tgsi: add ureg support for image decls
[mesa.git] / src / gallium / drivers / svga / svga_tgsi_vgpu10.c
1 /**********************************************************
2 * Copyright 1998-2013 VMware, Inc. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26 /**
27 * @file svga_tgsi_vgpu10.c
28 *
29 * TGSI -> VGPU10 shader translation.
30 *
31 * \author Mingcheng Chen
32 * \author Brian Paul
33 */
34
35 #include "pipe/p_compiler.h"
36 #include "pipe/p_shader_tokens.h"
37 #include "pipe/p_defines.h"
38 #include "tgsi/tgsi_build.h"
39 #include "tgsi/tgsi_dump.h"
40 #include "tgsi/tgsi_info.h"
41 #include "tgsi/tgsi_parse.h"
42 #include "tgsi/tgsi_scan.h"
43 #include "tgsi/tgsi_two_side.h"
44 #include "tgsi/tgsi_aa_point.h"
45 #include "tgsi/tgsi_util.h"
46 #include "util/u_math.h"
47 #include "util/u_memory.h"
48 #include "util/u_bitmask.h"
49 #include "util/u_debug.h"
50 #include "util/u_pstipple.h"
51
52 #include "svga_context.h"
53 #include "svga_debug.h"
54 #include "svga_link.h"
55 #include "svga_shader.h"
56 #include "svga_tgsi.h"
57
58 #include "VGPU10ShaderTokens.h"
59
60
61 #define INVALID_INDEX 99999
62 #define MAX_INTERNAL_TEMPS 3
63 #define MAX_SYSTEM_VALUES 4
64 #define MAX_IMMEDIATE_COUNT \
65 (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
66 #define MAX_TEMP_ARRAYS 64 /* Enough? */
67
68
69 /**
70 * Clipping is complicated. There's four different cases which we
71 * handle during VS/GS shader translation:
72 */
73 enum clipping_mode
74 {
75 CLIP_NONE, /**< No clipping enabled */
76 CLIP_LEGACY, /**< The shader has no clipping declarations or code but
77 * one or more user-defined clip planes are enabled. We
78 * generate extra code to emit clip distances.
79 */
80 CLIP_DISTANCE, /**< The shader already declares clip distance output
81 * registers and has code to write to them.
82 */
83 CLIP_VERTEX /**< The shader declares a clip vertex output register and
84 * has code that writes to the register. We convert the
85 * clipvertex position into one or more clip distances.
86 */
87 };
88
89
90 struct svga_shader_emitter_v10
91 {
92 /* The token output buffer */
93 unsigned size;
94 char *buf;
95 char *ptr;
96
97 /* Information about the shader and state (does not change) */
98 struct svga_compile_key key;
99 struct tgsi_shader_info info;
100 unsigned unit;
101
102 unsigned inst_start_token;
103 boolean discard_instruction; /**< throw away current instruction? */
104
105 union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4];
106 unsigned num_immediates; /**< Number of immediates emitted */
107 unsigned common_immediate_pos[8]; /**< literals for common immediates */
108 unsigned num_common_immediates;
109 boolean immediates_emitted;
110
111 unsigned num_outputs; /**< include any extra outputs */
112 /** The first extra output is reserved for
113 * non-adjusted vertex position for
114 * stream output purpose
115 */
116
117 /* Temporary Registers */
118 unsigned num_shader_temps; /**< num of temps used by original shader */
119 unsigned internal_temp_count; /**< currently allocated internal temps */
120 struct {
121 unsigned start, size;
122 } temp_arrays[MAX_TEMP_ARRAYS];
123 unsigned num_temp_arrays;
124
125 /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */
126 struct {
127 unsigned arrayId, index;
128 } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */
129
130 /** Number of constants used by original shader for each constant buffer.
131 * The size should probably always match with that of svga_state.constbufs.
132 */
133 unsigned num_shader_consts[SVGA_MAX_CONST_BUFS];
134
135 /* Samplers */
136 unsigned num_samplers;
137
138 /* Address regs (really implemented with temps) */
139 unsigned num_address_regs;
140 unsigned address_reg_index[MAX_VGPU10_ADDR_REGS];
141
142 /* Output register usage masks */
143 ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS];
144
145 /* To map TGSI system value index to VGPU shader input indexes */
146 ubyte system_value_indexes[MAX_SYSTEM_VALUES];
147
148 struct {
149 /* vertex position scale/translation */
150 unsigned out_index; /**< the real position output reg */
151 unsigned tmp_index; /**< the fake/temp position output reg */
152 unsigned so_index; /**< the non-adjusted position output reg */
153 unsigned prescale_scale_index, prescale_trans_index;
154 boolean need_prescale;
155 } vposition;
156
157 /* For vertex shaders only */
158 struct {
159 /* viewport constant */
160 unsigned viewport_index;
161
162 /* temp index of adjusted vertex attributes */
163 unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS];
164 } vs;
165
166 /* For fragment shaders only */
167 struct {
168 /* apha test */
169 unsigned color_out_index[PIPE_MAX_COLOR_BUFS]; /**< the real color output regs */
170 unsigned color_tmp_index; /**< fake/temp color output reg */
171 unsigned alpha_ref_index; /**< immediate constant for alpha ref */
172
173 /* front-face */
174 unsigned face_input_index; /**< real fragment shader face reg (bool) */
175 unsigned face_tmp_index; /**< temp face reg converted to -1 / +1 */
176
177 unsigned pstipple_sampler_unit;
178
179 unsigned fragcoord_input_index; /**< real fragment position input reg */
180 unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */
181 } fs;
182
183 /* For geometry shaders only */
184 struct {
185 VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */
186 VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */
187 unsigned input_size; /**< size of input arrays */
188 unsigned prim_id_index; /**< primitive id register index */
189 unsigned max_out_vertices; /**< maximum number of output vertices */
190 } gs;
191
192 /* For vertex or geometry shaders */
193 enum clipping_mode clip_mode;
194 unsigned clip_dist_out_index; /**< clip distance output register index */
195 unsigned clip_dist_tmp_index; /**< clip distance temporary register */
196 unsigned clip_dist_so_index; /**< clip distance shadow copy */
197
198 /** Index of temporary holding the clipvertex coordinate */
199 unsigned clip_vertex_out_index; /**< clip vertex output register index */
200 unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */
201
202 /* user clip plane constant slot indexes */
203 unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
204
205 unsigned num_output_writes;
206 boolean constant_color_output;
207
208 boolean uses_flat_interp;
209
210 /* For all shaders: const reg index for RECT coord scaling */
211 unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS];
212
213 /* For all shaders: const reg index for texture buffer size */
214 unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS];
215
216 /* VS/GS/FS Linkage info */
217 struct shader_linkage linkage;
218
219 bool register_overflow; /**< Set if we exceed a VGPU10 register limit */
220 };
221
222
223 static boolean
224 emit_post_helpers(struct svga_shader_emitter_v10 *emit);
225
226 static boolean
227 emit_vertex(struct svga_shader_emitter_v10 *emit,
228 const struct tgsi_full_instruction *inst);
229
230 static char err_buf[128];
231
232 static boolean
233 expand(struct svga_shader_emitter_v10 *emit)
234 {
235 char *new_buf;
236 unsigned newsize = emit->size * 2;
237
238 if (emit->buf != err_buf)
239 new_buf = REALLOC(emit->buf, emit->size, newsize);
240 else
241 new_buf = NULL;
242
243 if (!new_buf) {
244 emit->ptr = err_buf;
245 emit->buf = err_buf;
246 emit->size = sizeof(err_buf);
247 return FALSE;
248 }
249
250 emit->size = newsize;
251 emit->ptr = new_buf + (emit->ptr - emit->buf);
252 emit->buf = new_buf;
253 return TRUE;
254 }
255
256 /**
257 * Create and initialize a new svga_shader_emitter_v10 object.
258 */
259 static struct svga_shader_emitter_v10 *
260 alloc_emitter(void)
261 {
262 struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit));
263
264 if (!emit)
265 return NULL;
266
267 /* to initialize the output buffer */
268 emit->size = 512;
269 if (!expand(emit)) {
270 FREE(emit);
271 return NULL;
272 }
273 return emit;
274 }
275
276 /**
277 * Free an svga_shader_emitter_v10 object.
278 */
279 static void
280 free_emitter(struct svga_shader_emitter_v10 *emit)
281 {
282 assert(emit);
283 FREE(emit->buf); /* will be NULL if translation succeeded */
284 FREE(emit);
285 }
286
287 static inline boolean
288 reserve(struct svga_shader_emitter_v10 *emit,
289 unsigned nr_dwords)
290 {
291 while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) {
292 if (!expand(emit))
293 return FALSE;
294 }
295
296 return TRUE;
297 }
298
299 static boolean
300 emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword)
301 {
302 if (!reserve(emit, 1))
303 return FALSE;
304
305 *(uint32 *)emit->ptr = dword;
306 emit->ptr += sizeof dword;
307 return TRUE;
308 }
309
310 static boolean
311 emit_dwords(struct svga_shader_emitter_v10 *emit,
312 const uint32 *dwords,
313 unsigned nr)
314 {
315 if (!reserve(emit, nr))
316 return FALSE;
317
318 memcpy(emit->ptr, dwords, nr * sizeof *dwords);
319 emit->ptr += nr * sizeof *dwords;
320 return TRUE;
321 }
322
323 /** Return the number of tokens in the emitter's buffer */
324 static unsigned
325 emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit)
326 {
327 return (emit->ptr - emit->buf) / sizeof(unsigned);
328 }
329
330
331 /**
332 * Check for register overflow. If we overflow we'll set an
333 * error flag. This function can be called for register declarations
334 * or use as src/dst instruction operands.
335 * \param type register type. One of VGPU10_OPERAND_TYPE_x
336 or VGPU10_OPCODE_DCL_x
337 * \param index the register index
338 */
339 static void
340 check_register_index(struct svga_shader_emitter_v10 *emit,
341 unsigned operandType, unsigned index)
342 {
343 bool overflow_before = emit->register_overflow;
344
345 switch (operandType) {
346 case VGPU10_OPERAND_TYPE_TEMP:
347 case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP:
348 case VGPU10_OPCODE_DCL_TEMPS:
349 if (index >= VGPU10_MAX_TEMPS) {
350 emit->register_overflow = TRUE;
351 }
352 break;
353 case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER:
354 case VGPU10_OPCODE_DCL_CONSTANT_BUFFER:
355 if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
356 emit->register_overflow = TRUE;
357 }
358 break;
359 case VGPU10_OPERAND_TYPE_INPUT:
360 case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID:
361 case VGPU10_OPCODE_DCL_INPUT:
362 case VGPU10_OPCODE_DCL_INPUT_SGV:
363 case VGPU10_OPCODE_DCL_INPUT_SIV:
364 case VGPU10_OPCODE_DCL_INPUT_PS:
365 case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
366 case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
367 if ((emit->unit == PIPE_SHADER_VERTEX &&
368 index >= VGPU10_MAX_VS_INPUTS) ||
369 (emit->unit == PIPE_SHADER_GEOMETRY &&
370 index >= VGPU10_MAX_GS_INPUTS) ||
371 (emit->unit == PIPE_SHADER_FRAGMENT &&
372 index >= VGPU10_MAX_FS_INPUTS)) {
373 emit->register_overflow = TRUE;
374 }
375 break;
376 case VGPU10_OPERAND_TYPE_OUTPUT:
377 case VGPU10_OPCODE_DCL_OUTPUT:
378 case VGPU10_OPCODE_DCL_OUTPUT_SGV:
379 case VGPU10_OPCODE_DCL_OUTPUT_SIV:
380 if ((emit->unit == PIPE_SHADER_VERTEX &&
381 index >= VGPU10_MAX_VS_OUTPUTS) ||
382 (emit->unit == PIPE_SHADER_GEOMETRY &&
383 index >= VGPU10_MAX_GS_OUTPUTS) ||
384 (emit->unit == PIPE_SHADER_FRAGMENT &&
385 index >= VGPU10_MAX_FS_OUTPUTS)) {
386 emit->register_overflow = TRUE;
387 }
388 break;
389 case VGPU10_OPERAND_TYPE_SAMPLER:
390 case VGPU10_OPCODE_DCL_SAMPLER:
391 if (index >= VGPU10_MAX_SAMPLERS) {
392 emit->register_overflow = TRUE;
393 }
394 break;
395 case VGPU10_OPERAND_TYPE_RESOURCE:
396 case VGPU10_OPCODE_DCL_RESOURCE:
397 if (index >= VGPU10_MAX_RESOURCES) {
398 emit->register_overflow = TRUE;
399 }
400 break;
401 case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
402 if (index >= MAX_IMMEDIATE_COUNT) {
403 emit->register_overflow = TRUE;
404 }
405 break;
406 default:
407 assert(0);
408 ; /* nothing */
409 }
410
411 if (emit->register_overflow && !overflow_before) {
412 debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n",
413 operandType, index);
414 }
415 }
416
417
418 /**
419 * Examine misc state to determine the clipping mode.
420 */
421 static void
422 determine_clipping_mode(struct svga_shader_emitter_v10 *emit)
423 {
424 if (emit->info.num_written_clipdistance > 0) {
425 emit->clip_mode = CLIP_DISTANCE;
426 }
427 else if (emit->info.writes_clipvertex) {
428 emit->clip_mode = CLIP_VERTEX;
429 }
430 else if (emit->key.clip_plane_enable) {
431 emit->clip_mode = CLIP_LEGACY;
432 }
433 else {
434 emit->clip_mode = CLIP_NONE;
435 }
436 }
437
438
439 /**
440 * For clip distance register declarations and clip distance register
441 * writes we need to mask the declaration usage or instruction writemask
442 * (respectively) against the set of the really-enabled clipping planes.
443 *
444 * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables
445 * has a VS that writes to all 8 clip distance registers, but the plane enable
446 * flags are a subset of that.
447 *
448 * This function is used to apply the plane enable flags to the register
449 * declaration or instruction writemask.
450 *
451 * \param writemask the declaration usage mask or instruction writemask
452 * \param clip_reg_index which clip plane register is being declared/written.
453 * The legal values are 0 and 1 (two clip planes per
454 * register, for a total of 8 clip planes)
455 */
456 static unsigned
457 apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit,
458 unsigned writemask, unsigned clip_reg_index)
459 {
460 unsigned shift;
461
462 assert(clip_reg_index < 2);
463
464 /* four clip planes per clip register: */
465 shift = clip_reg_index * 4;
466 writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf);
467
468 return writemask;
469 }
470
471
472 /**
473 * Translate gallium shader type into VGPU10 type.
474 */
475 static VGPU10_PROGRAM_TYPE
476 translate_shader_type(unsigned type)
477 {
478 switch (type) {
479 case PIPE_SHADER_VERTEX:
480 return VGPU10_VERTEX_SHADER;
481 case PIPE_SHADER_GEOMETRY:
482 return VGPU10_GEOMETRY_SHADER;
483 case PIPE_SHADER_FRAGMENT:
484 return VGPU10_PIXEL_SHADER;
485 default:
486 assert(!"Unexpected shader type");
487 return VGPU10_VERTEX_SHADER;
488 }
489 }
490
491
492 /**
493 * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x
494 * Note: we only need to translate the opcodes for "simple" instructions,
495 * as seen below. All other opcodes are handled/translated specially.
496 */
497 static VGPU10_OPCODE_TYPE
498 translate_opcode(unsigned opcode)
499 {
500 switch (opcode) {
501 case TGSI_OPCODE_MOV:
502 return VGPU10_OPCODE_MOV;
503 case TGSI_OPCODE_MUL:
504 return VGPU10_OPCODE_MUL;
505 case TGSI_OPCODE_ADD:
506 return VGPU10_OPCODE_ADD;
507 case TGSI_OPCODE_DP3:
508 return VGPU10_OPCODE_DP3;
509 case TGSI_OPCODE_DP4:
510 return VGPU10_OPCODE_DP4;
511 case TGSI_OPCODE_MIN:
512 return VGPU10_OPCODE_MIN;
513 case TGSI_OPCODE_MAX:
514 return VGPU10_OPCODE_MAX;
515 case TGSI_OPCODE_MAD:
516 return VGPU10_OPCODE_MAD;
517 case TGSI_OPCODE_SQRT:
518 return VGPU10_OPCODE_SQRT;
519 case TGSI_OPCODE_FRC:
520 return VGPU10_OPCODE_FRC;
521 case TGSI_OPCODE_FLR:
522 return VGPU10_OPCODE_ROUND_NI;
523 case TGSI_OPCODE_FSEQ:
524 return VGPU10_OPCODE_EQ;
525 case TGSI_OPCODE_FSGE:
526 return VGPU10_OPCODE_GE;
527 case TGSI_OPCODE_FSNE:
528 return VGPU10_OPCODE_NE;
529 case TGSI_OPCODE_DDX:
530 return VGPU10_OPCODE_DERIV_RTX;
531 case TGSI_OPCODE_DDY:
532 return VGPU10_OPCODE_DERIV_RTY;
533 case TGSI_OPCODE_RET:
534 return VGPU10_OPCODE_RET;
535 case TGSI_OPCODE_DIV:
536 return VGPU10_OPCODE_DIV;
537 case TGSI_OPCODE_IDIV:
538 return VGPU10_OPCODE_IDIV;
539 case TGSI_OPCODE_DP2:
540 return VGPU10_OPCODE_DP2;
541 case TGSI_OPCODE_BRK:
542 return VGPU10_OPCODE_BREAK;
543 case TGSI_OPCODE_IF:
544 return VGPU10_OPCODE_IF;
545 case TGSI_OPCODE_ELSE:
546 return VGPU10_OPCODE_ELSE;
547 case TGSI_OPCODE_ENDIF:
548 return VGPU10_OPCODE_ENDIF;
549 case TGSI_OPCODE_CEIL:
550 return VGPU10_OPCODE_ROUND_PI;
551 case TGSI_OPCODE_I2F:
552 return VGPU10_OPCODE_ITOF;
553 case TGSI_OPCODE_NOT:
554 return VGPU10_OPCODE_NOT;
555 case TGSI_OPCODE_TRUNC:
556 return VGPU10_OPCODE_ROUND_Z;
557 case TGSI_OPCODE_SHL:
558 return VGPU10_OPCODE_ISHL;
559 case TGSI_OPCODE_AND:
560 return VGPU10_OPCODE_AND;
561 case TGSI_OPCODE_OR:
562 return VGPU10_OPCODE_OR;
563 case TGSI_OPCODE_XOR:
564 return VGPU10_OPCODE_XOR;
565 case TGSI_OPCODE_CONT:
566 return VGPU10_OPCODE_CONTINUE;
567 case TGSI_OPCODE_EMIT:
568 return VGPU10_OPCODE_EMIT;
569 case TGSI_OPCODE_ENDPRIM:
570 return VGPU10_OPCODE_CUT;
571 case TGSI_OPCODE_BGNLOOP:
572 return VGPU10_OPCODE_LOOP;
573 case TGSI_OPCODE_ENDLOOP:
574 return VGPU10_OPCODE_ENDLOOP;
575 case TGSI_OPCODE_ENDSUB:
576 return VGPU10_OPCODE_RET;
577 case TGSI_OPCODE_NOP:
578 return VGPU10_OPCODE_NOP;
579 case TGSI_OPCODE_BREAKC:
580 return VGPU10_OPCODE_BREAKC;
581 case TGSI_OPCODE_END:
582 return VGPU10_OPCODE_RET;
583 case TGSI_OPCODE_F2I:
584 return VGPU10_OPCODE_FTOI;
585 case TGSI_OPCODE_IMAX:
586 return VGPU10_OPCODE_IMAX;
587 case TGSI_OPCODE_IMIN:
588 return VGPU10_OPCODE_IMIN;
589 case TGSI_OPCODE_UDIV:
590 case TGSI_OPCODE_UMOD:
591 case TGSI_OPCODE_MOD:
592 return VGPU10_OPCODE_UDIV;
593 case TGSI_OPCODE_IMUL_HI:
594 return VGPU10_OPCODE_IMUL;
595 case TGSI_OPCODE_INEG:
596 return VGPU10_OPCODE_INEG;
597 case TGSI_OPCODE_ISHR:
598 return VGPU10_OPCODE_ISHR;
599 case TGSI_OPCODE_ISGE:
600 return VGPU10_OPCODE_IGE;
601 case TGSI_OPCODE_ISLT:
602 return VGPU10_OPCODE_ILT;
603 case TGSI_OPCODE_F2U:
604 return VGPU10_OPCODE_FTOU;
605 case TGSI_OPCODE_UADD:
606 return VGPU10_OPCODE_IADD;
607 case TGSI_OPCODE_U2F:
608 return VGPU10_OPCODE_UTOF;
609 case TGSI_OPCODE_UCMP:
610 return VGPU10_OPCODE_MOVC;
611 case TGSI_OPCODE_UMAD:
612 return VGPU10_OPCODE_UMAD;
613 case TGSI_OPCODE_UMAX:
614 return VGPU10_OPCODE_UMAX;
615 case TGSI_OPCODE_UMIN:
616 return VGPU10_OPCODE_UMIN;
617 case TGSI_OPCODE_UMUL:
618 case TGSI_OPCODE_UMUL_HI:
619 return VGPU10_OPCODE_UMUL;
620 case TGSI_OPCODE_USEQ:
621 return VGPU10_OPCODE_IEQ;
622 case TGSI_OPCODE_USGE:
623 return VGPU10_OPCODE_UGE;
624 case TGSI_OPCODE_USHR:
625 return VGPU10_OPCODE_USHR;
626 case TGSI_OPCODE_USLT:
627 return VGPU10_OPCODE_ULT;
628 case TGSI_OPCODE_USNE:
629 return VGPU10_OPCODE_INE;
630 case TGSI_OPCODE_SWITCH:
631 return VGPU10_OPCODE_SWITCH;
632 case TGSI_OPCODE_CASE:
633 return VGPU10_OPCODE_CASE;
634 case TGSI_OPCODE_DEFAULT:
635 return VGPU10_OPCODE_DEFAULT;
636 case TGSI_OPCODE_ENDSWITCH:
637 return VGPU10_OPCODE_ENDSWITCH;
638 case TGSI_OPCODE_FSLT:
639 return VGPU10_OPCODE_LT;
640 case TGSI_OPCODE_ROUND:
641 return VGPU10_OPCODE_ROUND_NE;
642 default:
643 assert(!"Unexpected TGSI opcode in translate_opcode()");
644 return VGPU10_OPCODE_NOP;
645 }
646 }
647
648
649 /**
650 * Translate a TGSI register file type into a VGPU10 operand type.
651 * \param array is the TGSI_FILE_TEMPORARY register an array?
652 */
653 static VGPU10_OPERAND_TYPE
654 translate_register_file(enum tgsi_file_type file, boolean array)
655 {
656 switch (file) {
657 case TGSI_FILE_CONSTANT:
658 return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
659 case TGSI_FILE_INPUT:
660 return VGPU10_OPERAND_TYPE_INPUT;
661 case TGSI_FILE_OUTPUT:
662 return VGPU10_OPERAND_TYPE_OUTPUT;
663 case TGSI_FILE_TEMPORARY:
664 return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP
665 : VGPU10_OPERAND_TYPE_TEMP;
666 case TGSI_FILE_IMMEDIATE:
667 /* all immediates are 32-bit values at this time so
668 * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time.
669 */
670 return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER;
671 case TGSI_FILE_SAMPLER:
672 return VGPU10_OPERAND_TYPE_SAMPLER;
673 case TGSI_FILE_SYSTEM_VALUE:
674 return VGPU10_OPERAND_TYPE_INPUT;
675
676 /* XXX TODO more cases to finish */
677
678 default:
679 assert(!"Bad tgsi register file!");
680 return VGPU10_OPERAND_TYPE_NULL;
681 }
682 }
683
684
685 /**
686 * Emit a null dst register
687 */
688 static void
689 emit_null_dst_register(struct svga_shader_emitter_v10 *emit)
690 {
691 VGPU10OperandToken0 operand;
692
693 operand.value = 0;
694 operand.operandType = VGPU10_OPERAND_TYPE_NULL;
695 operand.numComponents = VGPU10_OPERAND_0_COMPONENT;
696
697 emit_dword(emit, operand.value);
698 }
699
700
701 /**
702 * If the given register is a temporary, return the array ID.
703 * Else return zero.
704 */
705 static unsigned
706 get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
707 unsigned file, unsigned index)
708 {
709 if (file == TGSI_FILE_TEMPORARY) {
710 return emit->temp_map[index].arrayId;
711 }
712 else {
713 return 0;
714 }
715 }
716
717
718 /**
719 * If the given register is a temporary, convert the index from a TGSI
720 * TEMPORARY index to a VGPU10 temp index.
721 */
722 static unsigned
723 remap_temp_index(const struct svga_shader_emitter_v10 *emit,
724 unsigned file, unsigned index)
725 {
726 if (file == TGSI_FILE_TEMPORARY) {
727 return emit->temp_map[index].index;
728 }
729 else {
730 return index;
731 }
732 }
733
734
735 /**
736 * Setup the operand0 fields related to indexing (1D, 2D, relative, etc).
737 * Note: the operandType field must already be initialized.
738 */
739 static VGPU10OperandToken0
740 setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
741 VGPU10OperandToken0 operand0,
742 unsigned file,
743 boolean indirect, boolean index2D,
744 unsigned tempArrayID)
745 {
746 unsigned indexDim, index0Rep, index1Rep = VGPU10_OPERAND_INDEX_0D;
747
748 /*
749 * Compute index dimensions
750 */
751 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 ||
752 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
753 /* there's no swizzle for in-line immediates */
754 indexDim = VGPU10_OPERAND_INDEX_0D;
755 assert(operand0.selectionMode == 0);
756 }
757 else {
758 if (index2D ||
759 tempArrayID > 0 ||
760 operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) {
761 indexDim = VGPU10_OPERAND_INDEX_2D;
762 }
763 else {
764 indexDim = VGPU10_OPERAND_INDEX_1D;
765 }
766 }
767
768 /*
769 * Compute index representations (immediate, relative, etc).
770 */
771 if (tempArrayID > 0) {
772 assert(file == TGSI_FILE_TEMPORARY);
773 /* First index is the array ID, second index is the array element */
774 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
775 if (indirect) {
776 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
777 }
778 else {
779 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
780 }
781 }
782 else if (indirect) {
783 if (file == TGSI_FILE_CONSTANT) {
784 /* index[0] indicates which constant buffer while index[1] indicates
785 * the position in the constant buffer.
786 */
787 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
788 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
789 }
790 else {
791 /* All other register files are 1-dimensional */
792 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
793 }
794 }
795 else {
796 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
797 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
798 }
799
800 operand0.indexDimension = indexDim;
801 operand0.index0Representation = index0Rep;
802 operand0.index1Representation = index1Rep;
803
804 return operand0;
805 }
806
807
808 /**
809 * Emit the operand for expressing an address register for indirect indexing.
810 * Note that the address register is really just a temp register.
811 * \param addr_reg_index which address register to use
812 */
813 static void
814 emit_indirect_register(struct svga_shader_emitter_v10 *emit,
815 unsigned addr_reg_index)
816 {
817 unsigned tmp_reg_index;
818 VGPU10OperandToken0 operand0;
819
820 assert(addr_reg_index < MAX_VGPU10_ADDR_REGS);
821
822 tmp_reg_index = emit->address_reg_index[addr_reg_index];
823
824 /* operand0 is a simple temporary register, selecting one component */
825 operand0.value = 0;
826 operand0.operandType = VGPU10_OPERAND_TYPE_TEMP;
827 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
828 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
829 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
830 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
831 operand0.swizzleX = 0;
832 operand0.swizzleY = 1;
833 operand0.swizzleZ = 2;
834 operand0.swizzleW = 3;
835
836 emit_dword(emit, operand0.value);
837 emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index));
838 }
839
840
841 /**
842 * Translate the dst register of a TGSI instruction and emit VGPU10 tokens.
843 * \param emit the emitter context
844 * \param reg the TGSI dst register to translate
845 */
846 static void
847 emit_dst_register(struct svga_shader_emitter_v10 *emit,
848 const struct tgsi_full_dst_register *reg)
849 {
850 unsigned file = reg->Register.File;
851 unsigned index = reg->Register.Index;
852 const unsigned sem_name = emit->info.output_semantic_name[index];
853 const unsigned sem_index = emit->info.output_semantic_index[index];
854 unsigned writemask = reg->Register.WriteMask;
855 const unsigned indirect = reg->Register.Indirect;
856 const unsigned tempArrayId = get_temp_array_id(emit, file, index);
857 const unsigned index2d = reg->Register.Dimension;
858 VGPU10OperandToken0 operand0;
859
860 if (file == TGSI_FILE_OUTPUT) {
861 if (emit->unit == PIPE_SHADER_VERTEX ||
862 emit->unit == PIPE_SHADER_GEOMETRY) {
863 if (index == emit->vposition.out_index &&
864 emit->vposition.tmp_index != INVALID_INDEX) {
865 /* replace OUTPUT[POS] with TEMP[POS]. We need to store the
866 * vertex position result in a temporary so that we can modify
867 * it in the post_helper() code.
868 */
869 file = TGSI_FILE_TEMPORARY;
870 index = emit->vposition.tmp_index;
871 }
872 else if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
873 emit->clip_dist_tmp_index != INVALID_INDEX) {
874 /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
875 * We store the clip distance in a temporary first, then
876 * we'll copy it to the shadow copy and to CLIPDIST with the
877 * enabled planes mask in emit_clip_distance_instructions().
878 */
879 file = TGSI_FILE_TEMPORARY;
880 index = emit->clip_dist_tmp_index + sem_index;
881 }
882 else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
883 emit->clip_vertex_tmp_index != INVALID_INDEX) {
884 /* replace the CLIPVERTEX output register with a temporary */
885 assert(emit->clip_mode == CLIP_VERTEX);
886 assert(sem_index == 0);
887 file = TGSI_FILE_TEMPORARY;
888 index = emit->clip_vertex_tmp_index;
889 }
890 }
891 else if (emit->unit == PIPE_SHADER_FRAGMENT) {
892 if (sem_name == TGSI_SEMANTIC_POSITION) {
893 /* Fragment depth output register */
894 operand0.value = 0;
895 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
896 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
897 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
898 emit_dword(emit, operand0.value);
899 return;
900 }
901 else if (index == emit->fs.color_out_index[0] &&
902 emit->fs.color_tmp_index != INVALID_INDEX) {
903 /* replace OUTPUT[COLOR] with TEMP[COLOR]. We need to store the
904 * fragment color result in a temporary so that we can read it
905 * it in the post_helper() code.
906 */
907 file = TGSI_FILE_TEMPORARY;
908 index = emit->fs.color_tmp_index;
909 }
910 else {
911 /* Typically, for fragment shaders, the output register index
912 * matches the color semantic index. But not when we write to
913 * the fragment depth register. In that case, OUT[0] will be
914 * fragdepth and OUT[1] will be the 0th color output. We need
915 * to use the semantic index for color outputs.
916 */
917 assert(sem_name == TGSI_SEMANTIC_COLOR);
918 index = emit->info.output_semantic_index[index];
919
920 emit->num_output_writes++;
921 }
922 }
923 }
924
925 /* init operand tokens to all zero */
926 operand0.value = 0;
927
928 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
929
930 /* the operand has a writemask */
931 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
932
933 /* Which of the four dest components to write to. Note that we can use a
934 * simple assignment here since TGSI writemasks match VGPU10 writemasks.
935 */
936 STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X);
937 operand0.mask = writemask;
938
939 /* translate TGSI register file type to VGPU10 operand type */
940 operand0.operandType = translate_register_file(file, tempArrayId > 0);
941
942 check_register_index(emit, operand0.operandType, index);
943
944 operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
945 index2d, tempArrayId);
946
947 /* Emit tokens */
948 emit_dword(emit, operand0.value);
949 if (tempArrayId > 0) {
950 emit_dword(emit, tempArrayId);
951 }
952
953 emit_dword(emit, remap_temp_index(emit, file, index));
954
955 if (indirect) {
956 emit_indirect_register(emit, reg->Indirect.Index);
957 }
958 }
959
960
961 /**
962 * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
963 */
964 static void
965 emit_src_register(struct svga_shader_emitter_v10 *emit,
966 const struct tgsi_full_src_register *reg)
967 {
968 unsigned file = reg->Register.File;
969 unsigned index = reg->Register.Index;
970 const unsigned indirect = reg->Register.Indirect;
971 const unsigned tempArrayId = get_temp_array_id(emit, file, index);
972 const unsigned index2d = reg->Register.Dimension;
973 const unsigned swizzleX = reg->Register.SwizzleX;
974 const unsigned swizzleY = reg->Register.SwizzleY;
975 const unsigned swizzleZ = reg->Register.SwizzleZ;
976 const unsigned swizzleW = reg->Register.SwizzleW;
977 const unsigned absolute = reg->Register.Absolute;
978 const unsigned negate = reg->Register.Negate;
979 bool is_prim_id = FALSE;
980
981 VGPU10OperandToken0 operand0;
982 VGPU10OperandToken1 operand1;
983
984 if (emit->unit == PIPE_SHADER_FRAGMENT &&
985 file == TGSI_FILE_INPUT) {
986 if (index == emit->fs.face_input_index) {
987 /* Replace INPUT[FACE] with TEMP[FACE] */
988 file = TGSI_FILE_TEMPORARY;
989 index = emit->fs.face_tmp_index;
990 }
991 else if (index == emit->fs.fragcoord_input_index) {
992 /* Replace INPUT[POSITION] with TEMP[POSITION] */
993 file = TGSI_FILE_TEMPORARY;
994 index = emit->fs.fragcoord_tmp_index;
995 }
996 else {
997 /* We remap fragment shader inputs to that FS input indexes
998 * match up with VS/GS output indexes.
999 */
1000 index = emit->linkage.input_map[index];
1001 }
1002 }
1003 else if (emit->unit == PIPE_SHADER_GEOMETRY &&
1004 file == TGSI_FILE_INPUT) {
1005 is_prim_id = (index == emit->gs.prim_id_index);
1006 index = emit->linkage.input_map[index];
1007 }
1008 else if (emit->unit == PIPE_SHADER_VERTEX) {
1009 if (file == TGSI_FILE_INPUT) {
1010 /* if input is adjusted... */
1011 if ((emit->key.vs.adjust_attrib_w_1 |
1012 emit->key.vs.adjust_attrib_itof |
1013 emit->key.vs.adjust_attrib_utof |
1014 emit->key.vs.attrib_is_bgra |
1015 emit->key.vs.attrib_puint_to_snorm |
1016 emit->key.vs.attrib_puint_to_uscaled |
1017 emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) {
1018 file = TGSI_FILE_TEMPORARY;
1019 index = emit->vs.adjusted_input[index];
1020 }
1021 }
1022 else if (file == TGSI_FILE_SYSTEM_VALUE) {
1023 assert(index < Elements(emit->system_value_indexes));
1024 index = emit->system_value_indexes[index];
1025 }
1026 }
1027
1028 operand0.value = operand1.value = 0;
1029
1030 if (is_prim_id) {
1031 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
1032 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1033 }
1034 else {
1035 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1036 operand0.operandType = translate_register_file(file, tempArrayId > 0);
1037 }
1038
1039 operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1040 index2d, tempArrayId);
1041
1042 if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 &&
1043 operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
1044 /* there's no swizzle for in-line immediates */
1045 if (swizzleX == swizzleY &&
1046 swizzleX == swizzleZ &&
1047 swizzleX == swizzleW) {
1048 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1049 }
1050 else {
1051 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1052 }
1053
1054 operand0.swizzleX = swizzleX;
1055 operand0.swizzleY = swizzleY;
1056 operand0.swizzleZ = swizzleZ;
1057 operand0.swizzleW = swizzleW;
1058
1059 if (absolute || negate) {
1060 operand0.extended = 1;
1061 operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER;
1062 if (absolute && !negate)
1063 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS;
1064 if (!absolute && negate)
1065 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG;
1066 if (absolute && negate)
1067 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG;
1068 }
1069 }
1070
1071 /* Emit the operand tokens */
1072 emit_dword(emit, operand0.value);
1073 if (operand0.extended)
1074 emit_dword(emit, operand1.value);
1075
1076 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) {
1077 /* Emit the four float/int in-line immediate values */
1078 unsigned *c;
1079 assert(index < Elements(emit->immediates));
1080 assert(file == TGSI_FILE_IMMEDIATE);
1081 assert(swizzleX < 4);
1082 assert(swizzleY < 4);
1083 assert(swizzleZ < 4);
1084 assert(swizzleW < 4);
1085 c = (unsigned *) emit->immediates[index];
1086 emit_dword(emit, c[swizzleX]);
1087 emit_dword(emit, c[swizzleY]);
1088 emit_dword(emit, c[swizzleZ]);
1089 emit_dword(emit, c[swizzleW]);
1090 }
1091 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) {
1092 /* Emit the register index(es) */
1093 if (index2d ||
1094 operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) {
1095 emit_dword(emit, reg->Dimension.Index);
1096 }
1097
1098 if (tempArrayId > 0) {
1099 emit_dword(emit, tempArrayId);
1100 }
1101
1102 emit_dword(emit, remap_temp_index(emit, file, index));
1103
1104 if (indirect) {
1105 emit_indirect_register(emit, reg->Indirect.Index);
1106 }
1107 }
1108 }
1109
1110
1111 /**
1112 * Emit a resource operand (for use with a SAMPLE instruction).
1113 */
1114 static void
1115 emit_resource_register(struct svga_shader_emitter_v10 *emit,
1116 unsigned resource_number)
1117 {
1118 VGPU10OperandToken0 operand0;
1119
1120 check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number);
1121
1122 /* init */
1123 operand0.value = 0;
1124
1125 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
1126 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1127 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1128 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1129 operand0.swizzleX = VGPU10_COMPONENT_X;
1130 operand0.swizzleY = VGPU10_COMPONENT_Y;
1131 operand0.swizzleZ = VGPU10_COMPONENT_Z;
1132 operand0.swizzleW = VGPU10_COMPONENT_W;
1133
1134 emit_dword(emit, operand0.value);
1135 emit_dword(emit, resource_number);
1136 }
1137
1138
1139 /**
1140 * Emit a sampler operand (for use with a SAMPLE instruction).
1141 */
1142 static void
1143 emit_sampler_register(struct svga_shader_emitter_v10 *emit,
1144 unsigned sampler_number)
1145 {
1146 VGPU10OperandToken0 operand0;
1147
1148 check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number);
1149
1150 /* init */
1151 operand0.value = 0;
1152
1153 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
1154 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1155
1156 emit_dword(emit, operand0.value);
1157 emit_dword(emit, sampler_number);
1158 }
1159
1160
1161 /**
1162 * Emit an operand which reads the IS_FRONT_FACING register.
1163 */
1164 static void
1165 emit_face_register(struct svga_shader_emitter_v10 *emit)
1166 {
1167 VGPU10OperandToken0 operand0;
1168 unsigned index = emit->linkage.input_map[emit->fs.face_input_index];
1169
1170 /* init */
1171 operand0.value = 0;
1172
1173 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT;
1174 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1175 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1176 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1177
1178 operand0.swizzleX = VGPU10_COMPONENT_X;
1179 operand0.swizzleY = VGPU10_COMPONENT_X;
1180 operand0.swizzleZ = VGPU10_COMPONENT_X;
1181 operand0.swizzleW = VGPU10_COMPONENT_X;
1182
1183 emit_dword(emit, operand0.value);
1184 emit_dword(emit, index);
1185 }
1186
1187
1188 /**
1189 * Emit the token for a VGPU10 opcode.
1190 * \param saturate clamp result to [0,1]?
1191 */
1192 static void
1193 emit_opcode(struct svga_shader_emitter_v10 *emit,
1194 unsigned vgpu10_opcode, boolean saturate)
1195 {
1196 VGPU10OpcodeToken0 token0;
1197
1198 token0.value = 0; /* init all fields to zero */
1199 token0.opcodeType = vgpu10_opcode;
1200 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
1201 token0.saturate = saturate;
1202
1203 emit_dword(emit, token0.value);
1204 }
1205
1206
1207 /**
1208 * Emit the token for a VGPU10 resinfo instruction.
1209 * \param modifier return type modifier, _uint or _rcpFloat.
1210 * TODO: We may want to remove this parameter if it will
1211 * only ever be used as _uint.
1212 */
1213 static void
1214 emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit,
1215 VGPU10_RESINFO_RETURN_TYPE modifier)
1216 {
1217 VGPU10OpcodeToken0 token0;
1218
1219 token0.value = 0; /* init all fields to zero */
1220 token0.opcodeType = VGPU10_OPCODE_RESINFO;
1221 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
1222 token0.resinfoReturnType = modifier;
1223
1224 emit_dword(emit, token0.value);
1225 }
1226
1227
1228 /**
1229 * Emit opcode tokens for a texture sample instruction. Texture instructions
1230 * can be rather complicated (texel offsets, etc) so we have this specialized
1231 * function.
1232 */
1233 static void
1234 emit_sample_opcode(struct svga_shader_emitter_v10 *emit,
1235 unsigned vgpu10_opcode, boolean saturate,
1236 const int offsets[3])
1237 {
1238 VGPU10OpcodeToken0 token0;
1239 VGPU10OpcodeToken1 token1;
1240
1241 token0.value = 0; /* init all fields to zero */
1242 token0.opcodeType = vgpu10_opcode;
1243 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
1244 token0.saturate = saturate;
1245
1246 if (offsets[0] || offsets[1] || offsets[2]) {
1247 assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
1248 assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
1249 assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
1250 assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
1251 assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
1252 assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
1253
1254 token0.extended = 1;
1255 token1.value = 0;
1256 token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS;
1257 token1.offsetU = offsets[0];
1258 token1.offsetV = offsets[1];
1259 token1.offsetW = offsets[2];
1260 }
1261
1262 emit_dword(emit, token0.value);
1263 if (token0.extended) {
1264 emit_dword(emit, token1.value);
1265 }
1266 }
1267
1268
1269 /**
1270 * Emit a DISCARD opcode token.
1271 * If nonzero is set, we'll discard the fragment if the X component is not 0.
1272 * Otherwise, we'll discard the fragment if the X component is 0.
1273 */
1274 static void
1275 emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero)
1276 {
1277 VGPU10OpcodeToken0 opcode0;
1278
1279 opcode0.value = 0;
1280 opcode0.opcodeType = VGPU10_OPCODE_DISCARD;
1281 if (nonzero)
1282 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
1283
1284 emit_dword(emit, opcode0.value);
1285 }
1286
1287
1288 /**
1289 * We need to call this before we begin emitting a VGPU10 instruction.
1290 */
1291 static void
1292 begin_emit_instruction(struct svga_shader_emitter_v10 *emit)
1293 {
1294 assert(emit->inst_start_token == 0);
1295 /* Save location of the instruction's VGPU10OpcodeToken0 token.
1296 * Note, we can't save a pointer because it would become invalid if
1297 * we have to realloc the output buffer.
1298 */
1299 emit->inst_start_token = emit_get_num_tokens(emit);
1300 }
1301
1302
1303 /**
1304 * We need to call this after we emit the last token of a VGPU10 instruction.
1305 * This function patches in the opcode token's instructionLength field.
1306 */
1307 static void
1308 end_emit_instruction(struct svga_shader_emitter_v10 *emit)
1309 {
1310 VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
1311 unsigned inst_length;
1312
1313 assert(emit->inst_start_token > 0);
1314
1315 if (emit->discard_instruction) {
1316 /* Back up the emit->ptr to where this instruction started so
1317 * that we discard the current instruction.
1318 */
1319 emit->ptr = (char *) (tokens + emit->inst_start_token);
1320 }
1321 else {
1322 /* Compute instruction length and patch that into the start of
1323 * the instruction.
1324 */
1325 inst_length = emit_get_num_tokens(emit) - emit->inst_start_token;
1326
1327 assert(inst_length > 0);
1328
1329 tokens[emit->inst_start_token].instructionLength = inst_length;
1330 }
1331
1332 emit->inst_start_token = 0; /* reset to zero for error checking */
1333 emit->discard_instruction = FALSE;
1334 }
1335
1336
1337 /**
1338 * Return index for a free temporary register.
1339 */
1340 static unsigned
1341 get_temp_index(struct svga_shader_emitter_v10 *emit)
1342 {
1343 assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS);
1344 return emit->num_shader_temps + emit->internal_temp_count++;
1345 }
1346
1347
1348 /**
1349 * Release the temporaries which were generated by get_temp_index().
1350 */
1351 static void
1352 free_temp_indexes(struct svga_shader_emitter_v10 *emit)
1353 {
1354 emit->internal_temp_count = 0;
1355 }
1356
1357
1358 /**
1359 * Create a tgsi_full_src_register.
1360 */
1361 static struct tgsi_full_src_register
1362 make_src_reg(unsigned file, unsigned index)
1363 {
1364 struct tgsi_full_src_register reg;
1365
1366 memset(&reg, 0, sizeof(reg));
1367 reg.Register.File = file;
1368 reg.Register.Index = index;
1369 reg.Register.SwizzleX = TGSI_SWIZZLE_X;
1370 reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
1371 reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1372 reg.Register.SwizzleW = TGSI_SWIZZLE_W;
1373 return reg;
1374 }
1375
1376
1377 /**
1378 * Create a tgsi_full_src_register for a temporary.
1379 */
1380 static struct tgsi_full_src_register
1381 make_src_temp_reg(unsigned index)
1382 {
1383 return make_src_reg(TGSI_FILE_TEMPORARY, index);
1384 }
1385
1386
1387 /**
1388 * Create a tgsi_full_src_register for a constant.
1389 */
1390 static struct tgsi_full_src_register
1391 make_src_const_reg(unsigned index)
1392 {
1393 return make_src_reg(TGSI_FILE_CONSTANT, index);
1394 }
1395
1396
1397 /**
1398 * Create a tgsi_full_src_register for an immediate constant.
1399 */
1400 static struct tgsi_full_src_register
1401 make_src_immediate_reg(unsigned index)
1402 {
1403 return make_src_reg(TGSI_FILE_IMMEDIATE, index);
1404 }
1405
1406
1407 /**
1408 * Create a tgsi_full_dst_register.
1409 */
1410 static struct tgsi_full_dst_register
1411 make_dst_reg(unsigned file, unsigned index)
1412 {
1413 struct tgsi_full_dst_register reg;
1414
1415 memset(&reg, 0, sizeof(reg));
1416 reg.Register.File = file;
1417 reg.Register.Index = index;
1418 reg.Register.WriteMask = TGSI_WRITEMASK_XYZW;
1419 return reg;
1420 }
1421
1422
1423 /**
1424 * Create a tgsi_full_dst_register for a temporary.
1425 */
1426 static struct tgsi_full_dst_register
1427 make_dst_temp_reg(unsigned index)
1428 {
1429 return make_dst_reg(TGSI_FILE_TEMPORARY, index);
1430 }
1431
1432
1433 /**
1434 * Create a tgsi_full_dst_register for an output.
1435 */
1436 static struct tgsi_full_dst_register
1437 make_dst_output_reg(unsigned index)
1438 {
1439 return make_dst_reg(TGSI_FILE_OUTPUT, index);
1440 }
1441
1442
1443 /**
1444 * Create negated tgsi_full_src_register.
1445 */
1446 static struct tgsi_full_src_register
1447 negate_src(const struct tgsi_full_src_register *reg)
1448 {
1449 struct tgsi_full_src_register neg = *reg;
1450 neg.Register.Negate = !reg->Register.Negate;
1451 return neg;
1452 }
1453
1454 /**
1455 * Create absolute value of a tgsi_full_src_register.
1456 */
1457 static struct tgsi_full_src_register
1458 absolute_src(const struct tgsi_full_src_register *reg)
1459 {
1460 struct tgsi_full_src_register absolute = *reg;
1461 absolute.Register.Absolute = 1;
1462 return absolute;
1463 }
1464
1465
1466 /** Return the named swizzle term from the src register */
1467 static inline unsigned
1468 get_swizzle(const struct tgsi_full_src_register *reg, unsigned term)
1469 {
1470 switch (term) {
1471 case TGSI_SWIZZLE_X:
1472 return reg->Register.SwizzleX;
1473 case TGSI_SWIZZLE_Y:
1474 return reg->Register.SwizzleY;
1475 case TGSI_SWIZZLE_Z:
1476 return reg->Register.SwizzleZ;
1477 case TGSI_SWIZZLE_W:
1478 return reg->Register.SwizzleW;
1479 default:
1480 assert(!"Bad swizzle");
1481 return TGSI_SWIZZLE_X;
1482 }
1483 }
1484
1485
1486 /**
1487 * Create swizzled tgsi_full_src_register.
1488 */
1489 static struct tgsi_full_src_register
1490 swizzle_src(const struct tgsi_full_src_register *reg,
1491 unsigned swizzleX, unsigned swizzleY,
1492 unsigned swizzleZ, unsigned swizzleW)
1493 {
1494 struct tgsi_full_src_register swizzled = *reg;
1495 /* Note: we swizzle the current swizzle */
1496 swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX);
1497 swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY);
1498 swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ);
1499 swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW);
1500 return swizzled;
1501 }
1502
1503
1504 /**
1505 * Create swizzled tgsi_full_src_register where all the swizzle
1506 * terms are the same.
1507 */
1508 static struct tgsi_full_src_register
1509 scalar_src(const struct tgsi_full_src_register *reg, unsigned swizzle)
1510 {
1511 struct tgsi_full_src_register swizzled = *reg;
1512 /* Note: we swizzle the current swizzle */
1513 swizzled.Register.SwizzleX =
1514 swizzled.Register.SwizzleY =
1515 swizzled.Register.SwizzleZ =
1516 swizzled.Register.SwizzleW = get_swizzle(reg, swizzle);
1517 return swizzled;
1518 }
1519
1520
1521 /**
1522 * Create new tgsi_full_dst_register with writemask.
1523 * \param mask bitmask of TGSI_WRITEMASK_[XYZW]
1524 */
1525 static struct tgsi_full_dst_register
1526 writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask)
1527 {
1528 struct tgsi_full_dst_register masked = *reg;
1529 masked.Register.WriteMask = mask;
1530 return masked;
1531 }
1532
1533
1534 /**
1535 * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW.
1536 */
1537 static boolean
1538 same_swizzle_terms(const struct tgsi_full_src_register *reg)
1539 {
1540 return (reg->Register.SwizzleX == reg->Register.SwizzleY &&
1541 reg->Register.SwizzleY == reg->Register.SwizzleZ &&
1542 reg->Register.SwizzleZ == reg->Register.SwizzleW);
1543 }
1544
1545
1546 /**
1547 * Search the vector for the value 'x' and return its position.
1548 */
1549 static int
1550 find_imm_in_vec4(const union tgsi_immediate_data vec[4],
1551 union tgsi_immediate_data x)
1552 {
1553 unsigned i;
1554 for (i = 0; i < 4; i++) {
1555 if (vec[i].Int == x.Int)
1556 return i;
1557 }
1558 return -1;
1559 }
1560
1561
1562 /**
1563 * Helper used by make_immediate_reg(), make_immediate_reg_4().
1564 */
1565 static int
1566 find_immediate(struct svga_shader_emitter_v10 *emit,
1567 union tgsi_immediate_data x, unsigned startIndex)
1568 {
1569 const unsigned endIndex = emit->num_immediates;
1570 unsigned i;
1571
1572 assert(emit->immediates_emitted);
1573
1574 /* Search immediates for x, y, z, w */
1575 for (i = startIndex; i < endIndex; i++) {
1576 if (x.Int == emit->immediates[i][0].Int ||
1577 x.Int == emit->immediates[i][1].Int ||
1578 x.Int == emit->immediates[i][2].Int ||
1579 x.Int == emit->immediates[i][3].Int) {
1580 return i;
1581 }
1582 }
1583 /* Should never try to use an immediate value that wasn't pre-declared */
1584 assert(!"find_immediate() failed!");
1585 return -1;
1586 }
1587
1588
1589 /**
1590 * Return a tgsi_full_src_register for an immediate/literal
1591 * union tgsi_immediate_data[4] value.
1592 * Note: the values must have been previously declared/allocated in
1593 * emit_pre_helpers(). And, all of x,y,z,w must be located in the same
1594 * vec4 immediate.
1595 */
1596 static struct tgsi_full_src_register
1597 make_immediate_reg_4(struct svga_shader_emitter_v10 *emit,
1598 const union tgsi_immediate_data imm[4])
1599 {
1600 struct tgsi_full_src_register reg;
1601 unsigned i;
1602
1603 for (i = 0; i < emit->num_common_immediates; i++) {
1604 /* search for first component value */
1605 int immpos = find_immediate(emit, imm[0], i);
1606 int x, y, z, w;
1607
1608 assert(immpos >= 0);
1609
1610 /* find remaining components within the immediate vector */
1611 x = find_imm_in_vec4(emit->immediates[immpos], imm[0]);
1612 y = find_imm_in_vec4(emit->immediates[immpos], imm[1]);
1613 z = find_imm_in_vec4(emit->immediates[immpos], imm[2]);
1614 w = find_imm_in_vec4(emit->immediates[immpos], imm[3]);
1615
1616 if (x >=0 && y >= 0 && z >= 0 && w >= 0) {
1617 /* found them all */
1618 memset(&reg, 0, sizeof(reg));
1619 reg.Register.File = TGSI_FILE_IMMEDIATE;
1620 reg.Register.Index = immpos;
1621 reg.Register.SwizzleX = x;
1622 reg.Register.SwizzleY = y;
1623 reg.Register.SwizzleZ = z;
1624 reg.Register.SwizzleW = w;
1625 return reg;
1626 }
1627 /* else, keep searching */
1628 }
1629
1630 assert(!"Failed to find immediate register!");
1631
1632 /* Just return IMM[0].xxxx */
1633 memset(&reg, 0, sizeof(reg));
1634 reg.Register.File = TGSI_FILE_IMMEDIATE;
1635 return reg;
1636 }
1637
1638
1639 /**
1640 * Return a tgsi_full_src_register for an immediate/literal
1641 * union tgsi_immediate_data value of the form {value, value, value, value}.
1642 * \sa make_immediate_reg_4() regarding allowed values.
1643 */
1644 static struct tgsi_full_src_register
1645 make_immediate_reg(struct svga_shader_emitter_v10 *emit,
1646 union tgsi_immediate_data value)
1647 {
1648 struct tgsi_full_src_register reg;
1649 int immpos = find_immediate(emit, value, 0);
1650
1651 assert(immpos >= 0);
1652
1653 memset(&reg, 0, sizeof(reg));
1654 reg.Register.File = TGSI_FILE_IMMEDIATE;
1655 reg.Register.Index = immpos;
1656 reg.Register.SwizzleX =
1657 reg.Register.SwizzleY =
1658 reg.Register.SwizzleZ =
1659 reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value);
1660
1661 return reg;
1662 }
1663
1664
1665 /**
1666 * Return a tgsi_full_src_register for an immediate/literal float[4] value.
1667 * \sa make_immediate_reg_4() regarding allowed values.
1668 */
1669 static struct tgsi_full_src_register
1670 make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit,
1671 float x, float y, float z, float w)
1672 {
1673 union tgsi_immediate_data imm[4];
1674 imm[0].Float = x;
1675 imm[1].Float = y;
1676 imm[2].Float = z;
1677 imm[3].Float = w;
1678 return make_immediate_reg_4(emit, imm);
1679 }
1680
1681
1682 /**
1683 * Return a tgsi_full_src_register for an immediate/literal float value
1684 * of the form {value, value, value, value}.
1685 * \sa make_immediate_reg_4() regarding allowed values.
1686 */
1687 static struct tgsi_full_src_register
1688 make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value)
1689 {
1690 union tgsi_immediate_data imm;
1691 imm.Float = value;
1692 return make_immediate_reg(emit, imm);
1693 }
1694
1695
1696 /**
1697 * Return a tgsi_full_src_register for an immediate/literal int[4] vector.
1698 */
1699 static struct tgsi_full_src_register
1700 make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit,
1701 int x, int y, int z, int w)
1702 {
1703 union tgsi_immediate_data imm[4];
1704 imm[0].Int = x;
1705 imm[1].Int = y;
1706 imm[2].Int = z;
1707 imm[3].Int = w;
1708 return make_immediate_reg_4(emit, imm);
1709 }
1710
1711
1712 /**
1713 * Return a tgsi_full_src_register for an immediate/literal int value
1714 * of the form {value, value, value, value}.
1715 * \sa make_immediate_reg_4() regarding allowed values.
1716 */
1717 static struct tgsi_full_src_register
1718 make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value)
1719 {
1720 union tgsi_immediate_data imm;
1721 imm.Int = value;
1722 return make_immediate_reg(emit, imm);
1723 }
1724
1725
1726 /**
1727 * Allocate space for a union tgsi_immediate_data[4] immediate.
1728 * \return the index/position of the immediate.
1729 */
1730 static unsigned
1731 alloc_immediate_4(struct svga_shader_emitter_v10 *emit,
1732 const union tgsi_immediate_data imm[4])
1733 {
1734 unsigned n = emit->num_immediates++;
1735 assert(!emit->immediates_emitted);
1736 assert(n < Elements(emit->immediates));
1737 emit->immediates[n][0] = imm[0];
1738 emit->immediates[n][1] = imm[1];
1739 emit->immediates[n][2] = imm[2];
1740 emit->immediates[n][3] = imm[3];
1741 return n;
1742 }
1743
1744
1745 /**
1746 * Allocate space for a float[4] immediate.
1747 * \return the index/position of the immediate.
1748 */
1749 static unsigned
1750 alloc_immediate_float4(struct svga_shader_emitter_v10 *emit,
1751 float x, float y, float z, float w)
1752 {
1753 union tgsi_immediate_data imm[4];
1754 imm[0].Float = x;
1755 imm[1].Float = y;
1756 imm[2].Float = z;
1757 imm[3].Float = w;
1758 return alloc_immediate_4(emit, imm);
1759 }
1760
1761
1762 /**
1763 * Allocate space for a int[4] immediate.
1764 * \return the index/position of the immediate.
1765 */
1766 static unsigned
1767 alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
1768 int x, int y, int z, int w)
1769 {
1770 union tgsi_immediate_data imm[4];
1771 imm[0].Int = x;
1772 imm[1].Int = y;
1773 imm[2].Int = z;
1774 imm[3].Int = w;
1775 return alloc_immediate_4(emit, imm);
1776 }
1777
1778
1779 /**
1780 * Allocate a shader input to store a system value.
1781 */
1782 static unsigned
1783 alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
1784 {
1785 const unsigned n = emit->info.num_inputs + index;
1786 assert(index < Elements(emit->system_value_indexes));
1787 emit->system_value_indexes[index] = n;
1788 return n;
1789 }
1790
1791
1792 /**
1793 * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10.
1794 */
1795 static boolean
1796 emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit,
1797 const struct tgsi_full_immediate *imm)
1798 {
1799 /* We don't actually emit any code here. We just save the
1800 * immediate values and emit them later.
1801 */
1802 alloc_immediate_4(emit, imm->u);
1803 return TRUE;
1804 }
1805
1806
1807 /**
1808 * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block
1809 * containing all the immediate values previously allocated
1810 * with alloc_immediate_4().
1811 */
1812 static boolean
1813 emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit)
1814 {
1815 VGPU10OpcodeToken0 token;
1816
1817 assert(!emit->immediates_emitted);
1818
1819 token.value = 0;
1820 token.opcodeType = VGPU10_OPCODE_CUSTOMDATA;
1821 token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER;
1822
1823 /* Note: no begin/end_emit_instruction() calls */
1824 emit_dword(emit, token.value);
1825 emit_dword(emit, 2 + 4 * emit->num_immediates);
1826 emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates);
1827
1828 emit->immediates_emitted = TRUE;
1829
1830 return TRUE;
1831 }
1832
1833
1834 /**
1835 * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10
1836 * interpolation mode.
1837 * \return a VGPU10_INTERPOLATION_x value
1838 */
1839 static unsigned
1840 translate_interpolation(const struct svga_shader_emitter_v10 *emit,
1841 unsigned interp, unsigned interpolate_loc)
1842 {
1843 if (interp == TGSI_INTERPOLATE_COLOR) {
1844 interp = emit->key.fs.flatshade ?
1845 TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
1846 }
1847
1848 switch (interp) {
1849 case TGSI_INTERPOLATE_CONSTANT:
1850 return VGPU10_INTERPOLATION_CONSTANT;
1851 case TGSI_INTERPOLATE_LINEAR:
1852 return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ?
1853 VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID :
1854 VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
1855 case TGSI_INTERPOLATE_PERSPECTIVE:
1856 return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ?
1857 VGPU10_INTERPOLATION_LINEAR_CENTROID :
1858 VGPU10_INTERPOLATION_LINEAR;
1859 default:
1860 assert(!"Unexpected interpolation mode");
1861 return VGPU10_INTERPOLATION_CONSTANT;
1862 }
1863 }
1864
1865
1866 /**
1867 * Translate a TGSI property to VGPU10.
1868 * Don't emit any instructions yet, only need to gather the primitive property information.
1869 * The output primitive topology might be changed later. The final property instructions
1870 * will be emitted as part of the pre-helper code.
1871 */
1872 static boolean
1873 emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
1874 const struct tgsi_full_property *prop)
1875 {
1876 static const VGPU10_PRIMITIVE primType[] = {
1877 VGPU10_PRIMITIVE_POINT, /* PIPE_PRIM_POINTS */
1878 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINES */
1879 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_LOOP */
1880 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_STRIP */
1881 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLES */
1882 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_STRIP */
1883 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_FAN */
1884 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUADS */
1885 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */
1886 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_POLYGON */
1887 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */
1888 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
1889 VGPU10_PRIMITIVE_TRIANGLE_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
1890 VGPU10_PRIMITIVE_TRIANGLE_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
1891 };
1892
1893 static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = {
1894 VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST, /* PIPE_PRIM_POINTS */
1895 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINES */
1896 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINE_LOOP */
1897 VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP, /* PIPE_PRIM_LINE_STRIP */
1898 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST, /* PIPE_PRIM_TRIANGLES */
1899 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */
1900 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */
1901 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUADS */
1902 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */
1903 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_POLYGON */
1904 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */
1905 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
1906 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
1907 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
1908 };
1909
1910 static const unsigned inputArraySize[] = {
1911 0, /* VGPU10_PRIMITIVE_UNDEFINED */
1912 1, /* VGPU10_PRIMITIVE_POINT */
1913 2, /* VGPU10_PRIMITIVE_LINE */
1914 3, /* VGPU10_PRIMITIVE_TRIANGLE */
1915 0,
1916 0,
1917 4, /* VGPU10_PRIMITIVE_LINE_ADJ */
1918 6 /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */
1919 };
1920
1921 switch (prop->Property.PropertyName) {
1922 case TGSI_PROPERTY_GS_INPUT_PRIM:
1923 assert(prop->u[0].Data < Elements(primType));
1924 emit->gs.prim_type = primType[prop->u[0].Data];
1925 assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED);
1926 emit->gs.input_size = inputArraySize[emit->gs.prim_type];
1927 break;
1928
1929 case TGSI_PROPERTY_GS_OUTPUT_PRIM:
1930 assert(prop->u[0].Data < Elements(primTopology));
1931 emit->gs.prim_topology = primTopology[prop->u[0].Data];
1932 assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED);
1933 break;
1934
1935 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
1936 emit->gs.max_out_vertices = prop->u[0].Data;
1937 break;
1938
1939 default:
1940 break;
1941 }
1942
1943 return TRUE;
1944 }
1945
1946
1947 static void
1948 emit_property_instruction(struct svga_shader_emitter_v10 *emit,
1949 VGPU10OpcodeToken0 opcode0, unsigned nData,
1950 unsigned data)
1951 {
1952 begin_emit_instruction(emit);
1953 emit_dword(emit, opcode0.value);
1954 if (nData)
1955 emit_dword(emit, data);
1956 end_emit_instruction(emit);
1957 }
1958
1959
1960 /**
1961 * Emit property instructions
1962 */
1963 static void
1964 emit_property_instructions(struct svga_shader_emitter_v10 *emit)
1965 {
1966 VGPU10OpcodeToken0 opcode0;
1967
1968 assert(emit->unit == PIPE_SHADER_GEOMETRY);
1969
1970 /* emit input primitive type declaration */
1971 opcode0.value = 0;
1972 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE;
1973 opcode0.primitive = emit->gs.prim_type;
1974 emit_property_instruction(emit, opcode0, 0, 0);
1975
1976 /* emit output primitive topology declaration */
1977 opcode0.value = 0;
1978 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY;
1979 opcode0.primitiveTopology = emit->gs.prim_topology;
1980 emit_property_instruction(emit, opcode0, 0, 0);
1981
1982 /* emit max output vertices */
1983 opcode0.value = 0;
1984 opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT;
1985 emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices);
1986 }
1987
1988
1989 /**
1990 * Emit a vgpu10 declaration "instruction".
1991 * \param index the register index
1992 * \param size array size of the operand. In most cases, it is 1,
1993 * but for inputs to geometry shader, the array size varies
1994 * depending on the primitive type.
1995 */
1996 static void
1997 emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
1998 VGPU10OpcodeToken0 opcode0,
1999 VGPU10OperandToken0 operand0,
2000 VGPU10NameToken name_token,
2001 unsigned index, unsigned size)
2002 {
2003 assert(opcode0.opcodeType);
2004 assert(operand0.mask);
2005
2006 begin_emit_instruction(emit);
2007 emit_dword(emit, opcode0.value);
2008
2009 emit_dword(emit, operand0.value);
2010
2011 if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) {
2012 /* Next token is the index of the register to declare */
2013 emit_dword(emit, index);
2014 }
2015 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) {
2016 /* Next token is the size of the register */
2017 emit_dword(emit, size);
2018
2019 /* Followed by the index of the register */
2020 emit_dword(emit, index);
2021 }
2022
2023 if (name_token.value) {
2024 emit_dword(emit, name_token.value);
2025 }
2026
2027 end_emit_instruction(emit);
2028 }
2029
2030
2031 /**
2032 * Emit the declaration for a shader input.
2033 * \param opcodeType opcode type, one of VGPU10_OPCODE_DCL_INPUTx
2034 * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x
2035 * \param dim index dimension
2036 * \param index the input register index
2037 * \param size array size of the operand. In most cases, it is 1,
2038 * but for inputs to geometry shader, the array size varies
2039 * depending on the primitive type.
2040 * \param name one of VGPU10_NAME_x
2041 * \parma numComp number of components
2042 * \param selMode component selection mode
2043 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
2044 * \param interpMode interpolation mode
2045 */
2046 static void
2047 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
2048 unsigned opcodeType, unsigned operandType,
2049 unsigned dim, unsigned index, unsigned size,
2050 unsigned name, unsigned numComp,
2051 unsigned selMode, unsigned usageMask,
2052 unsigned interpMode)
2053 {
2054 VGPU10OpcodeToken0 opcode0;
2055 VGPU10OperandToken0 operand0;
2056 VGPU10NameToken name_token;
2057
2058 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
2059 assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
2060 opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
2061 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
2062 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
2063 assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
2064 operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID);
2065 assert(numComp <= VGPU10_OPERAND_4_COMPONENT);
2066 assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
2067 assert(dim <= VGPU10_OPERAND_INDEX_3D);
2068 assert(name == VGPU10_NAME_UNDEFINED ||
2069 name == VGPU10_NAME_POSITION ||
2070 name == VGPU10_NAME_INSTANCE_ID ||
2071 name == VGPU10_NAME_VERTEX_ID ||
2072 name == VGPU10_NAME_PRIMITIVE_ID ||
2073 name == VGPU10_NAME_IS_FRONT_FACE);
2074 assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
2075 interpMode == VGPU10_INTERPOLATION_CONSTANT ||
2076 interpMode == VGPU10_INTERPOLATION_LINEAR ||
2077 interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
2078 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
2079 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID);
2080
2081 check_register_index(emit, opcodeType, index);
2082
2083 opcode0.value = operand0.value = name_token.value = 0;
2084
2085 opcode0.opcodeType = opcodeType;
2086 opcode0.interpolationMode = interpMode;
2087
2088 operand0.operandType = operandType;
2089 operand0.numComponents = numComp;
2090 operand0.selectionMode = selMode;
2091 operand0.mask = usageMask;
2092 operand0.indexDimension = dim;
2093 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
2094 if (dim == VGPU10_OPERAND_INDEX_2D)
2095 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
2096
2097 name_token.name = name;
2098
2099 emit_decl_instruction(emit, opcode0, operand0, name_token, index, size);
2100 }
2101
2102
2103 /**
2104 * Emit the declaration for a shader output.
2105 * \param type one of VGPU10_OPCODE_DCL_OUTPUTx
2106 * \param index the output register index
2107 * \param name one of VGPU10_NAME_x
2108 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
2109 */
2110 static void
2111 emit_output_declaration(struct svga_shader_emitter_v10 *emit,
2112 unsigned type, unsigned index,
2113 unsigned name, unsigned usageMask)
2114 {
2115 VGPU10OpcodeToken0 opcode0;
2116 VGPU10OperandToken0 operand0;
2117 VGPU10NameToken name_token;
2118
2119 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
2120 assert(type == VGPU10_OPCODE_DCL_OUTPUT ||
2121 type == VGPU10_OPCODE_DCL_OUTPUT_SGV ||
2122 type == VGPU10_OPCODE_DCL_OUTPUT_SIV);
2123 assert(name == VGPU10_NAME_UNDEFINED ||
2124 name == VGPU10_NAME_POSITION ||
2125 name == VGPU10_NAME_PRIMITIVE_ID ||
2126 name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
2127 name == VGPU10_NAME_CLIP_DISTANCE);
2128
2129 check_register_index(emit, type, index);
2130
2131 opcode0.value = operand0.value = name_token.value = 0;
2132
2133 opcode0.opcodeType = type;
2134 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
2135 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2136 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
2137 operand0.mask = usageMask;
2138 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2139 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
2140
2141 name_token.name = name;
2142
2143 emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
2144 }
2145
2146
2147 /**
2148 * Emit the declaration for the fragment depth output.
2149 */
2150 static void
2151 emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
2152 {
2153 VGPU10OpcodeToken0 opcode0;
2154 VGPU10OperandToken0 operand0;
2155 VGPU10NameToken name_token;
2156
2157 assert(emit->unit == PIPE_SHADER_FRAGMENT);
2158
2159 opcode0.value = operand0.value = name_token.value = 0;
2160
2161 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
2162 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
2163 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
2164 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
2165 operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
2166
2167 emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
2168 }
2169
2170
2171 /**
2172 * Emit the declaration for a system value input/output.
2173 */
2174 static void
2175 emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
2176 unsigned semantic_name, unsigned index)
2177 {
2178 switch (semantic_name) {
2179 case TGSI_SEMANTIC_INSTANCEID:
2180 index = alloc_system_value_index(emit, index);
2181 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
2182 VGPU10_OPERAND_TYPE_INPUT,
2183 VGPU10_OPERAND_INDEX_1D,
2184 index, 1,
2185 VGPU10_NAME_INSTANCE_ID,
2186 VGPU10_OPERAND_4_COMPONENT,
2187 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
2188 VGPU10_OPERAND_4_COMPONENT_MASK_X,
2189 VGPU10_INTERPOLATION_UNDEFINED);
2190 break;
2191 case TGSI_SEMANTIC_VERTEXID:
2192 index = alloc_system_value_index(emit, index);
2193 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
2194 VGPU10_OPERAND_TYPE_INPUT,
2195 VGPU10_OPERAND_INDEX_1D,
2196 index, 1,
2197 VGPU10_NAME_VERTEX_ID,
2198 VGPU10_OPERAND_4_COMPONENT,
2199 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
2200 VGPU10_OPERAND_4_COMPONENT_MASK_X,
2201 VGPU10_INTERPOLATION_UNDEFINED);
2202 break;
2203 default:
2204 ; /* XXX */
2205 }
2206 }
2207
2208 /**
2209 * Translate a TGSI declaration to VGPU10.
2210 */
2211 static boolean
2212 emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
2213 const struct tgsi_full_declaration *decl)
2214 {
2215 switch (decl->Declaration.File) {
2216 case TGSI_FILE_INPUT:
2217 /* do nothing - see emit_input_declarations() */
2218 return TRUE;
2219
2220 case TGSI_FILE_OUTPUT:
2221 assert(decl->Range.First == decl->Range.Last);
2222 emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask;
2223 return TRUE;
2224
2225 case TGSI_FILE_TEMPORARY:
2226 /* Don't declare the temps here. Just keep track of how many
2227 * and emit the declaration later.
2228 */
2229 if (decl->Declaration.Array) {
2230 /* Indexed temporary array. Save the start index of the array
2231 * and the size of the array.
2232 */
2233 const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS);
2234 unsigned i;
2235
2236 assert(arrayID < ARRAY_SIZE(emit->temp_arrays));
2237
2238 /* Save this array so we can emit the declaration for it later */
2239 emit->temp_arrays[arrayID].start = decl->Range.First;
2240 emit->temp_arrays[arrayID].size =
2241 decl->Range.Last - decl->Range.First + 1;
2242
2243 emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1);
2244 assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS);
2245 emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS);
2246
2247 /* Fill in the temp_map entries for this array */
2248 for (i = decl->Range.First; i <= decl->Range.Last; i++) {
2249 emit->temp_map[i].arrayId = arrayID;
2250 emit->temp_map[i].index = i - decl->Range.First;
2251 }
2252 }
2253
2254 /* for all temps, indexed or not, keep track of highest index */
2255 emit->num_shader_temps = MAX2(emit->num_shader_temps,
2256 decl->Range.Last + 1);
2257 return TRUE;
2258
2259 case TGSI_FILE_CONSTANT:
2260 /* Don't declare constants here. Just keep track and emit later. */
2261 {
2262 unsigned constbuf = 0, num_consts;
2263 if (decl->Declaration.Dimension) {
2264 constbuf = decl->Dim.Index2D;
2265 }
2266 /* We throw an assertion here when, in fact, the shader should never
2267 * have linked due to constbuf index out of bounds, so we shouldn't
2268 * have reached here.
2269 */
2270 assert(constbuf < Elements(emit->num_shader_consts));
2271
2272 num_consts = MAX2(emit->num_shader_consts[constbuf],
2273 decl->Range.Last + 1);
2274
2275 if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
2276 debug_printf("Warning: constant buffer is declared to size [%u]"
2277 " but [%u] is the limit.\n",
2278 num_consts,
2279 VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
2280 }
2281 /* The linker doesn't enforce the max UBO size so we clamp here */
2282 emit->num_shader_consts[constbuf] =
2283 MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
2284 }
2285 return TRUE;
2286
2287 case TGSI_FILE_IMMEDIATE:
2288 assert(!"TGSI_FILE_IMMEDIATE not handled yet!");
2289 return FALSE;
2290
2291 case TGSI_FILE_SYSTEM_VALUE:
2292 emit_system_value_declaration(emit, decl->Semantic.Name,
2293 decl->Range.First);
2294 return TRUE;
2295
2296 case TGSI_FILE_SAMPLER:
2297 /* Don't declare samplers here. Just keep track and emit later. */
2298 emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
2299 return TRUE;
2300
2301 #if 0
2302 case TGSI_FILE_RESOURCE:
2303 /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
2304 /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
2305 assert(!"TGSI_FILE_RESOURCE not handled yet");
2306 return FALSE;
2307 #endif
2308
2309 case TGSI_FILE_ADDRESS:
2310 emit->num_address_regs = MAX2(emit->num_address_regs,
2311 decl->Range.Last + 1);
2312 return TRUE;
2313
2314 case TGSI_FILE_SAMPLER_VIEW:
2315 /* Not used at this time, but maybe in the future.
2316 * See emit_resource_declarations().
2317 */
2318 return TRUE;
2319
2320 default:
2321 assert(!"Unexpected type of declaration");
2322 return FALSE;
2323 }
2324 }
2325
2326
2327
2328 /**
2329 * Emit all input declarations.
2330 */
2331 static boolean
2332 emit_input_declarations(struct svga_shader_emitter_v10 *emit)
2333 {
2334 unsigned i;
2335
2336 if (emit->unit == PIPE_SHADER_FRAGMENT) {
2337
2338 for (i = 0; i < emit->linkage.num_inputs; i++) {
2339 unsigned semantic_name = emit->info.input_semantic_name[i];
2340 unsigned usage_mask = emit->info.input_usage_mask[i];
2341 unsigned index = emit->linkage.input_map[i];
2342 unsigned type, interpolationMode, name;
2343
2344 if (usage_mask == 0)
2345 continue; /* register is not actually used */
2346
2347 if (semantic_name == TGSI_SEMANTIC_POSITION) {
2348 /* fragment position input */
2349 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
2350 interpolationMode = VGPU10_INTERPOLATION_LINEAR;
2351 name = VGPU10_NAME_POSITION;
2352 if (usage_mask & TGSI_WRITEMASK_W) {
2353 /* we need to replace use of 'w' with '1/w' */
2354 emit->fs.fragcoord_input_index = i;
2355 }
2356 }
2357 else if (semantic_name == TGSI_SEMANTIC_FACE) {
2358 /* fragment front-facing input */
2359 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
2360 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
2361 name = VGPU10_NAME_IS_FRONT_FACE;
2362 emit->fs.face_input_index = i;
2363 }
2364 else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
2365 /* primitive ID */
2366 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
2367 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
2368 name = VGPU10_NAME_PRIMITIVE_ID;
2369 }
2370 else {
2371 /* general fragment input */
2372 type = VGPU10_OPCODE_DCL_INPUT_PS;
2373 interpolationMode =
2374 translate_interpolation(emit,
2375 emit->info.input_interpolate[i],
2376 emit->info.input_interpolate_loc[i]);
2377
2378 /* keeps track if flat interpolation mode is being used */
2379 emit->uses_flat_interp = emit->uses_flat_interp ||
2380 (interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
2381
2382 name = VGPU10_NAME_UNDEFINED;
2383 }
2384
2385 emit_input_declaration(emit, type,
2386 VGPU10_OPERAND_TYPE_INPUT,
2387 VGPU10_OPERAND_INDEX_1D, index, 1,
2388 name,
2389 VGPU10_OPERAND_4_COMPONENT,
2390 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
2391 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
2392 interpolationMode);
2393 }
2394 }
2395 else if (emit->unit == PIPE_SHADER_GEOMETRY) {
2396
2397 for (i = 0; i < emit->info.num_inputs; i++) {
2398 unsigned semantic_name = emit->info.input_semantic_name[i];
2399 unsigned usage_mask = emit->info.input_usage_mask[i];
2400 unsigned index = emit->linkage.input_map[i];
2401 unsigned opcodeType, operandType;
2402 unsigned numComp, selMode;
2403 unsigned name;
2404 unsigned dim;
2405
2406 if (usage_mask == 0)
2407 continue; /* register is not actually used */
2408
2409 opcodeType = VGPU10_OPCODE_DCL_INPUT;
2410 operandType = VGPU10_OPERAND_TYPE_INPUT;
2411 numComp = VGPU10_OPERAND_4_COMPONENT;
2412 selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
2413 name = VGPU10_NAME_UNDEFINED;
2414
2415 /* all geometry shader inputs are two dimensional except gl_PrimitiveID */
2416 dim = VGPU10_OPERAND_INDEX_2D;
2417
2418 if (semantic_name == TGSI_SEMANTIC_PRIMID) {
2419 /* Primitive ID */
2420 operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
2421 dim = VGPU10_OPERAND_INDEX_0D;
2422 numComp = VGPU10_OPERAND_0_COMPONENT;
2423 selMode = 0;
2424
2425 /* also save the register index so we can check for
2426 * primitive id when emit src register. We need to modify the
2427 * operand type, index dimension when emit primitive id src reg.
2428 */
2429 emit->gs.prim_id_index = i;
2430 }
2431 else if (semantic_name == TGSI_SEMANTIC_POSITION) {
2432 /* vertex position input */
2433 opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV;
2434 name = VGPU10_NAME_POSITION;
2435 }
2436
2437 emit_input_declaration(emit, opcodeType, operandType,
2438 dim, index,
2439 emit->gs.input_size,
2440 name,
2441 numComp, selMode,
2442 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
2443 VGPU10_INTERPOLATION_UNDEFINED);
2444 }
2445 }
2446 else {
2447 assert(emit->unit == PIPE_SHADER_VERTEX);
2448
2449 for (i = 0; i < emit->info.num_inputs; i++) {
2450 unsigned usage_mask = emit->info.input_usage_mask[i];
2451 unsigned index = i;
2452
2453 if (usage_mask == 0)
2454 continue; /* register is not actually used */
2455
2456 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
2457 VGPU10_OPERAND_TYPE_INPUT,
2458 VGPU10_OPERAND_INDEX_1D, index, 1,
2459 VGPU10_NAME_UNDEFINED,
2460 VGPU10_OPERAND_4_COMPONENT,
2461 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
2462 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
2463 VGPU10_INTERPOLATION_UNDEFINED);
2464 }
2465 }
2466
2467 return TRUE;
2468 }
2469
2470
2471 /**
2472 * Emit all output declarations.
2473 */
2474 static boolean
2475 emit_output_declarations(struct svga_shader_emitter_v10 *emit)
2476 {
2477 unsigned i;
2478
2479 for (i = 0; i < emit->info.num_outputs; i++) {
2480 /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
2481 const unsigned semantic_name = emit->info.output_semantic_name[i];
2482 const unsigned semantic_index = emit->info.output_semantic_index[i];
2483 unsigned index = i;
2484
2485 if (emit->unit == PIPE_SHADER_FRAGMENT) {
2486 if (semantic_name == TGSI_SEMANTIC_COLOR) {
2487 assert(semantic_index < Elements(emit->fs.color_out_index));
2488
2489 emit->fs.color_out_index[semantic_index] = index;
2490
2491 /* The semantic index is the shader's color output/buffer index */
2492 emit_output_declaration(emit,
2493 VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
2494 VGPU10_NAME_UNDEFINED,
2495 VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
2496
2497 if (semantic_index == 0) {
2498 if (emit->key.fs.write_color0_to_n_cbufs > 1) {
2499 /* Emit declarations for the additional color outputs
2500 * for broadcasting.
2501 */
2502 unsigned j;
2503 for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) {
2504 /* Allocate a new output index */
2505 unsigned idx = emit->info.num_outputs + j - 1;
2506 emit->fs.color_out_index[j] = idx;
2507 emit_output_declaration(emit,
2508 VGPU10_OPCODE_DCL_OUTPUT, idx,
2509 VGPU10_NAME_UNDEFINED,
2510 VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
2511 emit->info.output_semantic_index[idx] = j;
2512 }
2513 }
2514 }
2515 else {
2516 assert(!emit->key.fs.write_color0_to_n_cbufs);
2517 }
2518 }
2519 else if (semantic_name == TGSI_SEMANTIC_POSITION) {
2520 /* Fragment depth output */
2521 emit_fragdepth_output_declaration(emit);
2522 }
2523 else {
2524 assert(!"Bad output semantic name");
2525 }
2526 }
2527 else {
2528 /* VS or GS */
2529 unsigned name, type;
2530 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
2531
2532 switch (semantic_name) {
2533 case TGSI_SEMANTIC_POSITION:
2534 assert(emit->unit != PIPE_SHADER_FRAGMENT);
2535 type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
2536 name = VGPU10_NAME_POSITION;
2537 /* Save the index of the vertex position output register */
2538 emit->vposition.out_index = index;
2539 break;
2540 case TGSI_SEMANTIC_CLIPDIST:
2541 type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
2542 name = VGPU10_NAME_CLIP_DISTANCE;
2543 /* save the starting index of the clip distance output register */
2544 if (semantic_index == 0)
2545 emit->clip_dist_out_index = index;
2546 writemask = emit->output_usage_mask[index];
2547 writemask = apply_clip_plane_mask(emit, writemask, semantic_index);
2548 if (writemask == 0x0) {
2549 continue; /* discard this do-nothing declaration */
2550 }
2551 break;
2552 case TGSI_SEMANTIC_PRIMID:
2553 assert(emit->unit == PIPE_SHADER_GEOMETRY);
2554 type = VGPU10_OPCODE_DCL_OUTPUT_SGV;
2555 name = VGPU10_NAME_PRIMITIVE_ID;
2556 break;
2557 case TGSI_SEMANTIC_LAYER:
2558 assert(emit->unit == PIPE_SHADER_GEOMETRY);
2559 type = VGPU10_OPCODE_DCL_OUTPUT_SGV;
2560 name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX;
2561 break;
2562 case TGSI_SEMANTIC_CLIPVERTEX:
2563 type = VGPU10_OPCODE_DCL_OUTPUT;
2564 name = VGPU10_NAME_UNDEFINED;
2565 emit->clip_vertex_out_index = index;
2566 break;
2567 default:
2568 /* generic output */
2569 type = VGPU10_OPCODE_DCL_OUTPUT;
2570 name = VGPU10_NAME_UNDEFINED;
2571 }
2572
2573 emit_output_declaration(emit, type, index, name, writemask);
2574 }
2575 }
2576
2577 if (emit->vposition.so_index != INVALID_INDEX &&
2578 emit->vposition.out_index != INVALID_INDEX) {
2579
2580 assert(emit->unit != PIPE_SHADER_FRAGMENT);
2581
2582 /* Emit the declaration for the non-adjusted vertex position
2583 * for stream output purpose
2584 */
2585 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
2586 emit->vposition.so_index,
2587 VGPU10_NAME_UNDEFINED,
2588 VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
2589 }
2590
2591 if (emit->clip_dist_so_index != INVALID_INDEX &&
2592 emit->clip_dist_out_index != INVALID_INDEX) {
2593
2594 assert(emit->unit != PIPE_SHADER_FRAGMENT);
2595
2596 /* Emit the declaration for the clip distance shadow copy which
2597 * will be used for stream output purpose and for clip distance
2598 * varying variable
2599 */
2600 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
2601 emit->clip_dist_so_index,
2602 VGPU10_NAME_UNDEFINED,
2603 emit->output_usage_mask[emit->clip_dist_out_index]);
2604
2605 if (emit->info.num_written_clipdistance > 4) {
2606 /* for the second clip distance register, each handles 4 planes */
2607 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
2608 emit->clip_dist_so_index + 1,
2609 VGPU10_NAME_UNDEFINED,
2610 emit->output_usage_mask[emit->clip_dist_out_index+1]);
2611 }
2612 }
2613
2614 return TRUE;
2615 }
2616
2617
2618 /**
2619 * Emit the declaration for the temporary registers.
2620 */
2621 static boolean
2622 emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
2623 {
2624 unsigned total_temps, reg, i;
2625
2626 total_temps = emit->num_shader_temps;
2627
2628 /* Allocate extra temps for specially-implemented instructions,
2629 * such as LIT.
2630 */
2631 total_temps += MAX_INTERNAL_TEMPS;
2632
2633 if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) {
2634 if (emit->vposition.need_prescale || emit->key.vs.undo_viewport ||
2635 emit->key.clip_plane_enable ||
2636 emit->vposition.so_index != INVALID_INDEX) {
2637 emit->vposition.tmp_index = total_temps;
2638 total_temps += 1;
2639 }
2640
2641 if (emit->unit == PIPE_SHADER_VERTEX) {
2642 unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 |
2643 emit->key.vs.adjust_attrib_itof |
2644 emit->key.vs.adjust_attrib_utof |
2645 emit->key.vs.attrib_is_bgra |
2646 emit->key.vs.attrib_puint_to_snorm |
2647 emit->key.vs.attrib_puint_to_uscaled |
2648 emit->key.vs.attrib_puint_to_sscaled);
2649 while (attrib_mask) {
2650 unsigned index = u_bit_scan(&attrib_mask);
2651 emit->vs.adjusted_input[index] = total_temps++;
2652 }
2653 }
2654
2655 if (emit->clip_mode == CLIP_DISTANCE) {
2656 /* We need to write the clip distance to a temporary register
2657 * first. Then it will be copied to the shadow copy for
2658 * the clip distance varying variable and stream output purpose.
2659 * It will also be copied to the actual CLIPDIST register
2660 * according to the enabled clip planes
2661 */
2662 emit->clip_dist_tmp_index = total_temps++;
2663 if (emit->info.num_written_clipdistance > 4)
2664 total_temps++; /* second clip register */
2665 }
2666 else if (emit->clip_mode == CLIP_VERTEX) {
2667 /* We need to convert the TGSI CLIPVERTEX output to one or more
2668 * clip distances. Allocate a temp reg for the clipvertex here.
2669 */
2670 assert(emit->info.writes_clipvertex > 0);
2671 emit->clip_vertex_tmp_index = total_temps;
2672 total_temps++;
2673 }
2674 }
2675 else if (emit->unit == PIPE_SHADER_FRAGMENT) {
2676 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
2677 emit->key.fs.white_fragments ||
2678 emit->key.fs.write_color0_to_n_cbufs > 1) {
2679 /* Allocate a temp to hold the output color */
2680 emit->fs.color_tmp_index = total_temps;
2681 total_temps += 1;
2682 }
2683
2684 if (emit->fs.face_input_index != INVALID_INDEX) {
2685 /* Allocate a temp for the +/-1 face register */
2686 emit->fs.face_tmp_index = total_temps;
2687 total_temps += 1;
2688 }
2689
2690 if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
2691 /* Allocate a temp for modified fragment position register */
2692 emit->fs.fragcoord_tmp_index = total_temps;
2693 total_temps += 1;
2694 }
2695 }
2696
2697 for (i = 0; i < emit->num_address_regs; i++) {
2698 emit->address_reg_index[i] = total_temps++;
2699 }
2700
2701 /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10
2702 * temp indexes. Basically, we compact all the non-array temp register
2703 * indexes into a consecutive series.
2704 *
2705 * Before, we may have some TGSI declarations like:
2706 * DCL TEMP[0..1], LOCAL
2707 * DCL TEMP[2..4], ARRAY(1), LOCAL
2708 * DCL TEMP[5..7], ARRAY(2), LOCAL
2709 * plus, some extra temps, like TEMP[8], TEMP[9] for misc things
2710 *
2711 * After, we'll have a map like this:
2712 * temp_map[0] = { array 0, index 0 }
2713 * temp_map[1] = { array 0, index 1 }
2714 * temp_map[2] = { array 1, index 0 }
2715 * temp_map[3] = { array 1, index 1 }
2716 * temp_map[4] = { array 1, index 2 }
2717 * temp_map[5] = { array 2, index 0 }
2718 * temp_map[6] = { array 2, index 1 }
2719 * temp_map[7] = { array 2, index 2 }
2720 * temp_map[8] = { array 0, index 2 }
2721 * temp_map[9] = { array 0, index 3 }
2722 *
2723 * We'll declare two arrays of 3 elements, plus a set of four non-indexed
2724 * temps numbered 0..3
2725 *
2726 * Any time we emit a temporary register index, we'll have to use the
2727 * temp_map[] table to convert the TGSI index to the VGPU10 index.
2728 *
2729 * Finally, we recompute the total_temps value here.
2730 */
2731 reg = 0;
2732 for (i = 0; i < total_temps; i++) {
2733 if (emit->temp_map[i].arrayId == 0) {
2734 emit->temp_map[i].index = reg++;
2735 }
2736 }
2737 total_temps = reg;
2738
2739 if (0) {
2740 debug_printf("total_temps %u\n", total_temps);
2741 for (i = 0; i < 30; i++) {
2742 debug_printf("temp %u -> array %u index %u\n",
2743 i, emit->temp_map[i].arrayId, emit->temp_map[i].index);
2744 }
2745 }
2746
2747 /* Emit declaration of ordinary temp registers */
2748 if (total_temps > 0) {
2749 VGPU10OpcodeToken0 opcode0;
2750
2751 opcode0.value = 0;
2752 opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS;
2753
2754 begin_emit_instruction(emit);
2755 emit_dword(emit, opcode0.value);
2756 emit_dword(emit, total_temps);
2757 end_emit_instruction(emit);
2758 }
2759
2760 /* Emit declarations for indexable temp arrays. Skip 0th entry since
2761 * it's unused.
2762 */
2763 for (i = 1; i < emit->num_temp_arrays; i++) {
2764 unsigned num_temps = emit->temp_arrays[i].size;
2765
2766 if (num_temps > 0) {
2767 VGPU10OpcodeToken0 opcode0;
2768
2769 opcode0.value = 0;
2770 opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP;
2771
2772 begin_emit_instruction(emit);
2773 emit_dword(emit, opcode0.value);
2774 emit_dword(emit, i); /* which array */
2775 emit_dword(emit, num_temps);
2776 emit_dword(emit, 4); /* num components */
2777 end_emit_instruction(emit);
2778
2779 total_temps += num_temps;
2780 }
2781 }
2782
2783 /* Check that the grand total of all regular and indexed temps is
2784 * under the limit.
2785 */
2786 check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1);
2787
2788 return TRUE;
2789 }
2790
2791
2792 static boolean
2793 emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
2794 {
2795 VGPU10OpcodeToken0 opcode0;
2796 VGPU10OperandToken0 operand0;
2797 unsigned total_consts, i;
2798
2799 opcode0.value = 0;
2800 opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER;
2801 opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED;
2802 /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */
2803
2804 operand0.value = 0;
2805 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2806 operand0.indexDimension = VGPU10_OPERAND_INDEX_2D;
2807 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
2808 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
2809 operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
2810 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
2811 operand0.swizzleX = 0;
2812 operand0.swizzleY = 1;
2813 operand0.swizzleZ = 2;
2814 operand0.swizzleW = 3;
2815
2816 /**
2817 * Emit declaration for constant buffer [0]. We also allocate
2818 * room for the extra constants here.
2819 */
2820 total_consts = emit->num_shader_consts[0];
2821
2822 /* Now, allocate constant slots for the "extra" constants */
2823
2824 /* Vertex position scale/translation */
2825 if (emit->vposition.need_prescale) {
2826 emit->vposition.prescale_scale_index = total_consts++;
2827 emit->vposition.prescale_trans_index = total_consts++;
2828 }
2829
2830 if (emit->unit == PIPE_SHADER_VERTEX) {
2831 if (emit->key.vs.undo_viewport) {
2832 emit->vs.viewport_index = total_consts++;
2833 }
2834 }
2835
2836 /* user-defined clip planes */
2837 if (emit->key.clip_plane_enable) {
2838 unsigned n = util_bitcount(emit->key.clip_plane_enable);
2839 assert(emit->unit == PIPE_SHADER_VERTEX ||
2840 emit->unit == PIPE_SHADER_GEOMETRY);
2841 for (i = 0; i < n; i++) {
2842 emit->clip_plane_const[i] = total_consts++;
2843 }
2844 }
2845
2846 /* Texcoord scale factors for RECT textures */
2847 {
2848 for (i = 0; i < emit->num_samplers; i++) {
2849 if (emit->key.tex[i].unnormalized) {
2850 emit->texcoord_scale_index[i] = total_consts++;
2851 }
2852 }
2853 }
2854
2855 /* Texture buffer sizes */
2856 for (i = 0; i < emit->num_samplers; i++) {
2857 if (emit->key.tex[i].texture_target == PIPE_BUFFER) {
2858 emit->texture_buffer_size_index[i] = total_consts++;
2859 }
2860 }
2861
2862 if (total_consts > 0) {
2863 begin_emit_instruction(emit);
2864 emit_dword(emit, opcode0.value);
2865 emit_dword(emit, operand0.value);
2866 emit_dword(emit, 0); /* which const buffer slot */
2867 emit_dword(emit, total_consts);
2868 end_emit_instruction(emit);
2869 }
2870
2871 /* Declare remaining constant buffers (UBOs) */
2872 for (i = 1; i < Elements(emit->num_shader_consts); i++) {
2873 if (emit->num_shader_consts[i] > 0) {
2874 begin_emit_instruction(emit);
2875 emit_dword(emit, opcode0.value);
2876 emit_dword(emit, operand0.value);
2877 emit_dword(emit, i); /* which const buffer slot */
2878 emit_dword(emit, emit->num_shader_consts[i]);
2879 end_emit_instruction(emit);
2880 }
2881 }
2882
2883 return TRUE;
2884 }
2885
2886
2887 /**
2888 * Emit declarations for samplers.
2889 */
2890 static boolean
2891 emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
2892 {
2893 unsigned i;
2894
2895 for (i = 0; i < emit->num_samplers; i++) {
2896 VGPU10OpcodeToken0 opcode0;
2897 VGPU10OperandToken0 operand0;
2898
2899 opcode0.value = 0;
2900 opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER;
2901 opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT;
2902
2903 operand0.value = 0;
2904 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
2905 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
2906 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2907 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
2908
2909 begin_emit_instruction(emit);
2910 emit_dword(emit, opcode0.value);
2911 emit_dword(emit, operand0.value);
2912 emit_dword(emit, i);
2913 end_emit_instruction(emit);
2914 }
2915
2916 return TRUE;
2917 }
2918
2919
2920 /**
2921 * Translate PIPE_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x.
2922 */
2923 static unsigned
2924 pipe_texture_to_resource_dimension(unsigned target, bool msaa)
2925 {
2926 switch (target) {
2927 case PIPE_BUFFER:
2928 return VGPU10_RESOURCE_DIMENSION_BUFFER;
2929 case PIPE_TEXTURE_1D:
2930 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
2931 case PIPE_TEXTURE_2D:
2932 case PIPE_TEXTURE_RECT:
2933 return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS
2934 : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
2935 case PIPE_TEXTURE_3D:
2936 return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
2937 case PIPE_TEXTURE_CUBE:
2938 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
2939 case PIPE_TEXTURE_1D_ARRAY:
2940 return VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY;
2941 case PIPE_TEXTURE_2D_ARRAY:
2942 return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
2943 : VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY;
2944 case PIPE_TEXTURE_CUBE_ARRAY:
2945 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY;
2946 default:
2947 assert(!"Unexpected resource type");
2948 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
2949 }
2950 }
2951
2952
2953 /**
2954 * Given a tgsi_return_type, return true iff it is an integer type.
2955 */
2956 static boolean
2957 is_integer_type(enum tgsi_return_type type)
2958 {
2959 switch (type) {
2960 case TGSI_RETURN_TYPE_SINT:
2961 case TGSI_RETURN_TYPE_UINT:
2962 return TRUE;
2963 case TGSI_RETURN_TYPE_FLOAT:
2964 case TGSI_RETURN_TYPE_UNORM:
2965 case TGSI_RETURN_TYPE_SNORM:
2966 return FALSE;
2967 case TGSI_RETURN_TYPE_COUNT:
2968 default:
2969 assert(!"is_integer_type: Unknown tgsi_return_type");
2970 return FALSE;
2971 }
2972 }
2973
2974
2975 /**
2976 * Emit declarations for resources.
2977 * XXX When we're sure that all TGSI shaders will be generated with
2978 * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may
2979 * rework this code.
2980 */
2981 static boolean
2982 emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
2983 {
2984 unsigned i;
2985
2986 /* Emit resource decl for each sampler */
2987 for (i = 0; i < emit->num_samplers; i++) {
2988 VGPU10OpcodeToken0 opcode0;
2989 VGPU10OperandToken0 operand0;
2990 VGPU10ResourceReturnTypeToken return_type;
2991 VGPU10_RESOURCE_RETURN_TYPE rt;
2992
2993 opcode0.value = 0;
2994 opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
2995 opcode0.resourceDimension =
2996 pipe_texture_to_resource_dimension(emit->key.tex[i].texture_target,
2997 emit->key.tex[i].texture_msaa);
2998 operand0.value = 0;
2999 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
3000 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
3001 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
3002 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3003
3004 #if 1
3005 /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */
3006 STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1);
3007 STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1);
3008 STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
3009 STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
3010 STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
3011 assert(emit->key.tex[i].return_type <= TGSI_RETURN_TYPE_FLOAT);
3012 rt = emit->key.tex[i].return_type + 1;
3013 #else
3014 switch (emit->key.tex[i].return_type) {
3015 case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
3016 case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
3017 case TGSI_RETURN_TYPE_SINT: rt = VGPU10_RETURN_TYPE_SINT; break;
3018 case TGSI_RETURN_TYPE_UINT: rt = VGPU10_RETURN_TYPE_UINT; break;
3019 case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break;
3020 case TGSI_RETURN_TYPE_COUNT:
3021 default:
3022 rt = VGPU10_RETURN_TYPE_FLOAT;
3023 assert(!"emit_resource_declarations: Unknown tgsi_return_type");
3024 }
3025 #endif
3026
3027 return_type.value = 0;
3028 return_type.component0 = rt;
3029 return_type.component1 = rt;
3030 return_type.component2 = rt;
3031 return_type.component3 = rt;
3032
3033 begin_emit_instruction(emit);
3034 emit_dword(emit, opcode0.value);
3035 emit_dword(emit, operand0.value);
3036 emit_dword(emit, i);
3037 emit_dword(emit, return_type.value);
3038 end_emit_instruction(emit);
3039 }
3040
3041 return TRUE;
3042 }
3043
3044 static void
3045 emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
3046 unsigned opcode,
3047 const struct tgsi_full_dst_register *dst,
3048 const struct tgsi_full_src_register *src,
3049 boolean saturate)
3050 {
3051 begin_emit_instruction(emit);
3052 emit_opcode(emit, opcode, saturate);
3053 emit_dst_register(emit, dst);
3054 emit_src_register(emit, src);
3055 end_emit_instruction(emit);
3056 }
3057
3058 static void
3059 emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
3060 unsigned opcode,
3061 const struct tgsi_full_dst_register *dst,
3062 const struct tgsi_full_src_register *src1,
3063 const struct tgsi_full_src_register *src2,
3064 boolean saturate)
3065 {
3066 begin_emit_instruction(emit);
3067 emit_opcode(emit, opcode, saturate);
3068 emit_dst_register(emit, dst);
3069 emit_src_register(emit, src1);
3070 emit_src_register(emit, src2);
3071 end_emit_instruction(emit);
3072 }
3073
3074 static void
3075 emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
3076 unsigned opcode,
3077 const struct tgsi_full_dst_register *dst,
3078 const struct tgsi_full_src_register *src1,
3079 const struct tgsi_full_src_register *src2,
3080 const struct tgsi_full_src_register *src3,
3081 boolean saturate)
3082 {
3083 begin_emit_instruction(emit);
3084 emit_opcode(emit, opcode, saturate);
3085 emit_dst_register(emit, dst);
3086 emit_src_register(emit, src1);
3087 emit_src_register(emit, src2);
3088 emit_src_register(emit, src3);
3089 end_emit_instruction(emit);
3090 }
3091
3092 /**
3093 * Emit the actual clip distance instructions to be used for clipping
3094 * by copying the clip distance from the temporary registers to the
3095 * CLIPDIST registers written with the enabled planes mask.
3096 * Also copy the clip distance from the temporary to the clip distance
3097 * shadow copy register which will be referenced by the input shader
3098 */
3099 static void
3100 emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
3101 {
3102 struct tgsi_full_src_register tmp_clip_dist_src;
3103 struct tgsi_full_dst_register clip_dist_dst;
3104
3105 unsigned i;
3106 unsigned clip_plane_enable = emit->key.clip_plane_enable;
3107 unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
3108 int num_written_clipdist = emit->info.num_written_clipdistance;
3109
3110 assert(emit->clip_dist_out_index != INVALID_INDEX);
3111 assert(emit->clip_dist_tmp_index != INVALID_INDEX);
3112
3113 /**
3114 * Temporary reset the temporary clip dist register index so
3115 * that the copy to the real clip dist register will not
3116 * attempt to copy to the temporary register again
3117 */
3118 emit->clip_dist_tmp_index = INVALID_INDEX;
3119
3120 for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) {
3121
3122 tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
3123
3124 /**
3125 * copy to the shadow copy for use by varying variable and
3126 * stream output. All clip distances
3127 * will be written regardless of the enabled clipping planes.
3128 */
3129 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
3130 emit->clip_dist_so_index + i);
3131
3132 /* MOV clip_dist_so, tmp_clip_dist */
3133 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
3134 &tmp_clip_dist_src, FALSE);
3135
3136 /**
3137 * copy those clip distances to enabled clipping planes
3138 * to CLIPDIST registers for clipping
3139 */
3140 if (clip_plane_enable & 0xf) {
3141 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
3142 emit->clip_dist_out_index + i);
3143 clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf);
3144
3145 /* MOV CLIPDIST, tmp_clip_dist */
3146 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
3147 &tmp_clip_dist_src, FALSE);
3148 }
3149 /* four clip planes per clip register */
3150 clip_plane_enable >>= 4;
3151 }
3152 /**
3153 * set the temporary clip dist register index back to the
3154 * temporary index for the next vertex
3155 */
3156 emit->clip_dist_tmp_index = clip_dist_tmp_index;
3157 }
3158
3159 /* Declare clip distance output registers for user-defined clip planes
3160 * or the TGSI_CLIPVERTEX output.
3161 */
3162 static void
3163 emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
3164 {
3165 unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
3166 unsigned index = emit->num_outputs;
3167 unsigned plane_mask;
3168
3169 assert(emit->unit == PIPE_SHADER_VERTEX ||
3170 emit->unit == PIPE_SHADER_GEOMETRY);
3171 assert(num_clip_planes <= 8);
3172
3173 if (emit->clip_mode != CLIP_LEGACY &&
3174 emit->clip_mode != CLIP_VERTEX) {
3175 return;
3176 }
3177
3178 if (num_clip_planes == 0)
3179 return;
3180
3181 /* Declare one or two clip output registers. The number of components
3182 * in the mask reflects the number of clip planes. For example, if 5
3183 * clip planes are needed, we'll declare outputs similar to:
3184 * dcl_output_siv o2.xyzw, clip_distance
3185 * dcl_output_siv o3.x, clip_distance
3186 */
3187 emit->clip_dist_out_index = index; /* save the starting clip dist reg index */
3188
3189 plane_mask = (1 << num_clip_planes) - 1;
3190 if (plane_mask & 0xf) {
3191 unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3192 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index,
3193 VGPU10_NAME_CLIP_DISTANCE, cmask);
3194 emit->num_outputs++;
3195 }
3196 if (plane_mask & 0xf0) {
3197 unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3198 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1,
3199 VGPU10_NAME_CLIP_DISTANCE, cmask);
3200 emit->num_outputs++;
3201 }
3202 }
3203
3204
3205 /**
3206 * Emit the instructions for writing to the clip distance registers
3207 * to handle legacy/automatic clip planes.
3208 * For each clip plane, the distance is the dot product of the vertex
3209 * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients.
3210 * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE
3211 * output registers already declared.
3212 */
3213 static void
3214 emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit,
3215 unsigned vpos_tmp_index)
3216 {
3217 unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
3218
3219 assert(emit->clip_mode == CLIP_LEGACY);
3220 assert(num_clip_planes <= 8);
3221
3222 assert(emit->unit == PIPE_SHADER_VERTEX ||
3223 emit->unit == PIPE_SHADER_GEOMETRY);
3224
3225 for (i = 0; i < num_clip_planes; i++) {
3226 struct tgsi_full_dst_register dst;
3227 struct tgsi_full_src_register plane_src, vpos_src;
3228 unsigned reg_index = emit->clip_dist_out_index + i / 4;
3229 unsigned comp = i % 4;
3230 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
3231
3232 /* create dst, src regs */
3233 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
3234 dst = writemask_dst(&dst, writemask);
3235
3236 plane_src = make_src_const_reg(emit->clip_plane_const[i]);
3237 vpos_src = make_src_temp_reg(vpos_tmp_index);
3238
3239 /* DP4 clip_dist, plane, vpos */
3240 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
3241 &plane_src, &vpos_src, FALSE);
3242 }
3243 }
3244
3245
3246 /**
3247 * Emit the instructions for computing the clip distance results from
3248 * the clip vertex temporary.
3249 * For each clip plane, the distance is the dot product of the clip vertex
3250 * position (found in a temp reg) and the clip plane coefficients.
3251 */
3252 static void
3253 emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
3254 {
3255 const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable);
3256 unsigned i;
3257 struct tgsi_full_dst_register dst;
3258 struct tgsi_full_src_register clipvert_src;
3259 const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index;
3260
3261 assert(emit->unit == PIPE_SHADER_VERTEX ||
3262 emit->unit == PIPE_SHADER_GEOMETRY);
3263
3264 assert(emit->clip_mode == CLIP_VERTEX);
3265
3266 clipvert_src = make_src_temp_reg(clip_vertex_tmp);
3267
3268 for (i = 0; i < num_clip; i++) {
3269 struct tgsi_full_src_register plane_src;
3270 unsigned reg_index = emit->clip_dist_out_index + i / 4;
3271 unsigned comp = i % 4;
3272 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
3273
3274 /* create dst, src regs */
3275 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
3276 dst = writemask_dst(&dst, writemask);
3277
3278 plane_src = make_src_const_reg(emit->clip_plane_const[i]);
3279
3280 /* DP4 clip_dist, plane, vpos */
3281 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
3282 &plane_src, &clipvert_src, FALSE);
3283 }
3284
3285 /* copy temporary clip vertex register to the clip vertex register */
3286
3287 assert(emit->clip_vertex_out_index != INVALID_INDEX);
3288
3289 /**
3290 * temporary reset the temporary clip vertex register index so
3291 * that copy to the clip vertex register will not attempt
3292 * to copy to the temporary register again
3293 */
3294 emit->clip_vertex_tmp_index = INVALID_INDEX;
3295
3296 /* MOV clip_vertex, clip_vertex_tmp */
3297 dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index);
3298 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
3299 &dst, &clipvert_src, FALSE);
3300
3301 /**
3302 * set the temporary clip vertex register index back to the
3303 * temporary index for the next vertex
3304 */
3305 emit->clip_vertex_tmp_index = clip_vertex_tmp;
3306 }
3307
3308 /**
3309 * Emit code to convert RGBA to BGRA
3310 */
3311 static void
3312 emit_swap_r_b(struct svga_shader_emitter_v10 *emit,
3313 const struct tgsi_full_dst_register *dst,
3314 const struct tgsi_full_src_register *src)
3315 {
3316 struct tgsi_full_src_register bgra_src =
3317 swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W);
3318
3319 begin_emit_instruction(emit);
3320 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
3321 emit_dst_register(emit, dst);
3322 emit_src_register(emit, &bgra_src);
3323 end_emit_instruction(emit);
3324 }
3325
3326
3327 /** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */
3328 static void
3329 emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit,
3330 const struct tgsi_full_dst_register *dst,
3331 const struct tgsi_full_src_register *src)
3332 {
3333 struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f);
3334 struct tgsi_full_src_register two =
3335 make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f);
3336 struct tgsi_full_src_register neg_two =
3337 make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
3338
3339 unsigned val_tmp = get_temp_index(emit);
3340 struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp);
3341 struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp);
3342
3343 unsigned bias_tmp = get_temp_index(emit);
3344 struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp);
3345 struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp);
3346
3347 /* val = src * 2.0 */
3348 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst,
3349 src, &two, FALSE);
3350
3351 /* bias = src > 0.5 */
3352 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst,
3353 src, &half, FALSE);
3354
3355 /* bias = bias & -2.0 */
3356 emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst,
3357 &bias_src, &neg_two, FALSE);
3358
3359 /* dst = val + bias */
3360 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst,
3361 &val_src, &bias_src, FALSE);
3362
3363 free_temp_indexes(emit);
3364 }
3365
3366
3367 /** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */
3368 static void
3369 emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit,
3370 const struct tgsi_full_dst_register *dst,
3371 const struct tgsi_full_src_register *src)
3372 {
3373 struct tgsi_full_src_register scale =
3374 make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f);
3375
3376 /* dst = src * scale */
3377 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale, FALSE);
3378 }
3379
3380
3381 /** Convert from R32_UINT to 10_10_10_2_sscaled */
3382 static void
3383 emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit,
3384 const struct tgsi_full_dst_register *dst,
3385 const struct tgsi_full_src_register *src)
3386 {
3387 struct tgsi_full_src_register lshift =
3388 make_immediate_reg_int4(emit, 22, 12, 2, 0);
3389 struct tgsi_full_src_register rshift =
3390 make_immediate_reg_int4(emit, 22, 22, 22, 30);
3391
3392 struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X);
3393
3394 unsigned tmp = get_temp_index(emit);
3395 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
3396 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
3397
3398 /*
3399 * r = (pixel << 22) >> 22; # signed int in [511, -512]
3400 * g = (pixel << 12) >> 22; # signed int in [511, -512]
3401 * b = (pixel << 2) >> 22; # signed int in [511, -512]
3402 * a = (pixel << 0) >> 30; # signed int in [1, -2]
3403 * dst = i_to_f(r,g,b,a); # convert to float
3404 */
3405 emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst,
3406 &src_xxxx, &lshift, FALSE);
3407 emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst,
3408 &tmp_src, &rshift, FALSE);
3409 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src, FALSE);
3410
3411 free_temp_indexes(emit);
3412 }
3413
3414
3415 /**
3416 * Emit code for TGSI_OPCODE_ABS instruction.
3417 */
3418 static boolean
3419 emit_abs(struct svga_shader_emitter_v10 *emit,
3420 const struct tgsi_full_instruction *inst)
3421 {
3422 /* dst = ABS(s0):
3423 * dst = abs(s0)
3424 * Translates into:
3425 * MOV dst, abs(s0)
3426 */
3427 struct tgsi_full_src_register abs_src0 = absolute_src(&inst->Src[0]);
3428
3429 /* MOV dst, abs(s0) */
3430 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
3431 &abs_src0, inst->Instruction.Saturate);
3432
3433 return TRUE;
3434 }
3435
3436
3437 /**
3438 * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
3439 */
3440 static boolean
3441 emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
3442 const struct tgsi_full_instruction *inst)
3443 {
3444 unsigned index = inst->Dst[0].Register.Index;
3445 struct tgsi_full_dst_register dst;
3446 unsigned opcode;
3447
3448 assert(index < MAX_VGPU10_ADDR_REGS);
3449 dst = make_dst_temp_reg(emit->address_reg_index[index]);
3450
3451 /* ARL dst, s0
3452 * Translates into:
3453 * FTOI address_tmp, s0
3454 *
3455 * UARL dst, s0
3456 * Translates into:
3457 * MOV address_tmp, s0
3458 */
3459 if (inst->Instruction.Opcode == TGSI_OPCODE_ARL)
3460 opcode = VGPU10_OPCODE_FTOI;
3461 else
3462 opcode = VGPU10_OPCODE_MOV;
3463
3464 emit_instruction_op1(emit, opcode, &dst, &inst->Src[0], FALSE);
3465
3466 return TRUE;
3467 }
3468
3469
3470 /**
3471 * Emit code for TGSI_OPCODE_CAL instruction.
3472 */
3473 static boolean
3474 emit_cal(struct svga_shader_emitter_v10 *emit,
3475 const struct tgsi_full_instruction *inst)
3476 {
3477 unsigned label = inst->Label.Label;
3478 VGPU10OperandToken0 operand;
3479 operand.value = 0;
3480 operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
3481
3482 begin_emit_instruction(emit);
3483 emit_dword(emit, operand.value);
3484 emit_dword(emit, label);
3485 end_emit_instruction(emit);
3486
3487 return TRUE;
3488 }
3489
3490
3491 /**
3492 * Emit code for TGSI_OPCODE_IABS instruction.
3493 */
3494 static boolean
3495 emit_iabs(struct svga_shader_emitter_v10 *emit,
3496 const struct tgsi_full_instruction *inst)
3497 {
3498 /* dst.x = (src0.x < 0) ? -src0.x : src0.x
3499 * dst.y = (src0.y < 0) ? -src0.y : src0.y
3500 * dst.z = (src0.z < 0) ? -src0.z : src0.z
3501 * dst.w = (src0.w < 0) ? -src0.w : src0.w
3502 *
3503 * Translates into
3504 * IMAX dst, src, neg(src)
3505 */
3506 struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
3507 emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0],
3508 &inst->Src[0], &neg_src, FALSE);
3509
3510 return TRUE;
3511 }
3512
3513
3514 /**
3515 * Emit code for TGSI_OPCODE_CMP instruction.
3516 */
3517 static boolean
3518 emit_cmp(struct svga_shader_emitter_v10 *emit,
3519 const struct tgsi_full_instruction *inst)
3520 {
3521 /* dst.x = (src0.x < 0) ? src1.x : src2.x
3522 * dst.y = (src0.y < 0) ? src1.y : src2.y
3523 * dst.z = (src0.z < 0) ? src1.z : src2.z
3524 * dst.w = (src0.w < 0) ? src1.w : src2.w
3525 *
3526 * Translates into
3527 * LT tmp, src0, 0.0
3528 * MOVC dst, tmp, src1, src2
3529 */
3530 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
3531 unsigned tmp = get_temp_index(emit);
3532 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
3533 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
3534
3535 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst,
3536 &inst->Src[0], &zero, FALSE);
3537 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0],
3538 &tmp_src, &inst->Src[1], &inst->Src[2],
3539 inst->Instruction.Saturate);
3540
3541 free_temp_indexes(emit);
3542
3543 return TRUE;
3544 }
3545
3546
3547 /**
3548 * Emit code for TGSI_OPCODE_DP2A instruction.
3549 */
3550 static boolean
3551 emit_dp2a(struct svga_shader_emitter_v10 *emit,
3552 const struct tgsi_full_instruction *inst)
3553 {
3554 /* dst.x = src0.x * src1.x + src0.y * src1.y + src2.x
3555 * dst.y = src0.x * src1.x + src0.y * src1.y + src2.x
3556 * dst.z = src0.x * src1.x + src0.y * src1.y + src2.x
3557 * dst.w = src0.x * src1.x + src0.y * src1.y + src2.x
3558 * Translate into
3559 * MAD tmp.x, s0.y, s1.y, s2.x
3560 * MAD tmp.x, s0.x, s1.x, tmp.x
3561 * MOV dst.xyzw, tmp.xxxx
3562 */
3563 unsigned tmp = get_temp_index(emit);
3564 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
3565 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
3566
3567 struct tgsi_full_src_register tmp_src_xxxx =
3568 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
3569 struct tgsi_full_dst_register tmp_dst_x =
3570 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
3571
3572 struct tgsi_full_src_register src0_xxxx =
3573 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
3574 struct tgsi_full_src_register src0_yyyy =
3575 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
3576 struct tgsi_full_src_register src1_xxxx =
3577 scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
3578 struct tgsi_full_src_register src1_yyyy =
3579 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
3580 struct tgsi_full_src_register src2_xxxx =
3581 scalar_src(&inst->Src[2], TGSI_SWIZZLE_X);
3582
3583 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_yyyy,
3584 &src1_yyyy, &src2_xxxx, FALSE);
3585 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_xxxx,
3586 &src1_xxxx, &tmp_src_xxxx, FALSE);
3587 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
3588 &tmp_src_xxxx, inst->Instruction.Saturate);
3589
3590 free_temp_indexes(emit);
3591
3592 return TRUE;
3593 }
3594
3595
3596 /**
3597 * Emit code for TGSI_OPCODE_DPH instruction.
3598 */
3599 static boolean
3600 emit_dph(struct svga_shader_emitter_v10 *emit,
3601 const struct tgsi_full_instruction *inst)
3602 {
3603 /*
3604 * DP3 tmp, s0, s1
3605 * ADD dst, tmp, s1.wwww
3606 */
3607
3608 struct tgsi_full_src_register s1_wwww =
3609 swizzle_src(&inst->Src[1], TGSI_SWIZZLE_W, TGSI_SWIZZLE_W,
3610 TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
3611
3612 unsigned tmp = get_temp_index(emit);
3613 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
3614 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
3615
3616 /* DP3 tmp, s0, s1 */
3617 emit_instruction_op2(emit, VGPU10_OPCODE_DP3, &tmp_dst, &inst->Src[0],
3618 &inst->Src[1], FALSE);
3619
3620 /* ADD dst, tmp, s1.wwww */
3621 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], &tmp_src,
3622 &s1_wwww, inst->Instruction.Saturate);
3623
3624 free_temp_indexes(emit);
3625
3626 return TRUE;
3627 }
3628
3629
3630 /**
3631 * Emit code for TGSI_OPCODE_DST instruction.
3632 */
3633 static boolean
3634 emit_dst(struct svga_shader_emitter_v10 *emit,
3635 const struct tgsi_full_instruction *inst)
3636 {
3637 /*
3638 * dst.x = 1
3639 * dst.y = src0.y * src1.y
3640 * dst.z = src0.z
3641 * dst.w = src1.w
3642 */
3643
3644 struct tgsi_full_src_register s0_yyyy =
3645 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
3646 struct tgsi_full_src_register s0_zzzz =
3647 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
3648 struct tgsi_full_src_register s1_yyyy =
3649 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
3650 struct tgsi_full_src_register s1_wwww =
3651 scalar_src(&inst->Src[1], TGSI_SWIZZLE_W);
3652
3653 /*
3654 * If dst and either src0 and src1 are the same we need
3655 * to create a temporary for it and insert a extra move.
3656 */
3657 unsigned tmp_move = get_temp_index(emit);
3658 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
3659 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
3660
3661 /* MOV dst.x, 1.0 */
3662 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3663 struct tgsi_full_dst_register dst_x =
3664 writemask_dst(&move_dst, TGSI_WRITEMASK_X);
3665 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
3666
3667 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE);
3668 }
3669
3670 /* MUL dst.y, s0.y, s1.y */
3671 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3672 struct tgsi_full_dst_register dst_y =
3673 writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
3674
3675 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy,
3676 &s1_yyyy, inst->Instruction.Saturate);
3677 }
3678
3679 /* MOV dst.z, s0.z */
3680 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3681 struct tgsi_full_dst_register dst_z =
3682 writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
3683
3684 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, &s0_zzzz,
3685 inst->Instruction.Saturate);
3686 }
3687
3688 /* MOV dst.w, s1.w */
3689 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3690 struct tgsi_full_dst_register dst_w =
3691 writemask_dst(&move_dst, TGSI_WRITEMASK_W);
3692
3693 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &s1_wwww,
3694 inst->Instruction.Saturate);
3695 }
3696
3697 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
3698 FALSE);
3699 free_temp_indexes(emit);
3700
3701 return TRUE;
3702 }
3703
3704
3705
3706 /**
3707 * Emit code for TGSI_OPCODE_ENDPRIM (GS only)
3708 */
3709 static boolean
3710 emit_endprim(struct svga_shader_emitter_v10 *emit,
3711 const struct tgsi_full_instruction *inst)
3712 {
3713 assert(emit->unit == PIPE_SHADER_GEOMETRY);
3714
3715 /* We can't use emit_simple() because the TGSI instruction has one
3716 * operand (vertex stream number) which we must ignore for VGPU10.
3717 */
3718 begin_emit_instruction(emit);
3719 emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE);
3720 end_emit_instruction(emit);
3721 return TRUE;
3722 }
3723
3724
3725 /**
3726 * Emit code for TGSI_OPCODE_EX2 (2^x) instruction.
3727 */
3728 static boolean
3729 emit_ex2(struct svga_shader_emitter_v10 *emit,
3730 const struct tgsi_full_instruction *inst)
3731 {
3732 /* Note that TGSI_OPCODE_EX2 computes only one value from src.x
3733 * while VGPU10 computes four values.
3734 *
3735 * dst = EX2(src):
3736 * dst.xyzw = 2.0 ^ src.x
3737 */
3738
3739 struct tgsi_full_src_register src_xxxx =
3740 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
3741 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
3742
3743 /* EXP tmp, s0.xxxx */
3744 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx,
3745 inst->Instruction.Saturate);
3746
3747 return TRUE;
3748 }
3749
3750
3751 /**
3752 * Emit code for TGSI_OPCODE_EXP instruction.
3753 */
3754 static boolean
3755 emit_exp(struct svga_shader_emitter_v10 *emit,
3756 const struct tgsi_full_instruction *inst)
3757 {
3758 /*
3759 * dst.x = 2 ^ floor(s0.x)
3760 * dst.y = s0.x - floor(s0.x)
3761 * dst.z = 2 ^ s0.x
3762 * dst.w = 1.0
3763 */
3764
3765 struct tgsi_full_src_register src_xxxx =
3766 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
3767 unsigned tmp = get_temp_index(emit);
3768 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
3769 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
3770
3771 /*
3772 * If dst and src are the same we need to create
3773 * a temporary for it and insert a extra move.
3774 */
3775 unsigned tmp_move = get_temp_index(emit);
3776 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
3777 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
3778
3779 /* only use X component of temp reg */
3780 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
3781 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
3782
3783 /* ROUND_NI tmp.x, s0.x */
3784 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
3785 &src_xxxx, FALSE); /* round to -infinity */
3786
3787 /* EXP dst.x, tmp.x */
3788 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3789 struct tgsi_full_dst_register dst_x =
3790 writemask_dst(&move_dst, TGSI_WRITEMASK_X);
3791
3792 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src,
3793 inst->Instruction.Saturate);
3794 }
3795
3796 /* ADD dst.y, s0.x, -tmp */
3797 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3798 struct tgsi_full_dst_register dst_y =
3799 writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
3800 struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src);
3801
3802 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx,
3803 &neg_tmp_src, inst->Instruction.Saturate);
3804 }
3805
3806 /* EXP dst.z, s0.x */
3807 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3808 struct tgsi_full_dst_register dst_z =
3809 writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
3810
3811 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx,
3812 inst->Instruction.Saturate);
3813 }
3814
3815 /* MOV dst.w, 1.0 */
3816 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3817 struct tgsi_full_dst_register dst_w =
3818 writemask_dst(&move_dst, TGSI_WRITEMASK_W);
3819 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
3820
3821 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one,
3822 FALSE);
3823 }
3824
3825 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
3826 FALSE);
3827
3828 free_temp_indexes(emit);
3829
3830 return TRUE;
3831 }
3832
3833
3834 /**
3835 * Emit code for TGSI_OPCODE_IF instruction.
3836 */
3837 static boolean
3838 emit_if(struct svga_shader_emitter_v10 *emit,
3839 const struct tgsi_full_instruction *inst)
3840 {
3841 VGPU10OpcodeToken0 opcode0;
3842
3843 /* The src register should be a scalar */
3844 assert(inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleY &&
3845 inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleZ &&
3846 inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleW);
3847
3848 /* The only special thing here is that we need to set the
3849 * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if
3850 * src.x is non-zero.
3851 */
3852 opcode0.value = 0;
3853 opcode0.opcodeType = VGPU10_OPCODE_IF;
3854 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
3855
3856 begin_emit_instruction(emit);
3857 emit_dword(emit, opcode0.value);
3858 emit_src_register(emit, &inst->Src[0]);
3859 end_emit_instruction(emit);
3860
3861 return TRUE;
3862 }
3863
3864
3865 /**
3866 * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of
3867 * the register components are negative).
3868 */
3869 static boolean
3870 emit_kill_if(struct svga_shader_emitter_v10 *emit,
3871 const struct tgsi_full_instruction *inst)
3872 {
3873 unsigned tmp = get_temp_index(emit);
3874 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
3875 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
3876
3877 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
3878
3879 struct tgsi_full_dst_register tmp_dst_x =
3880 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
3881 struct tgsi_full_src_register tmp_src_xxxx =
3882 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
3883
3884 /* tmp = src[0] < 0.0 */
3885 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
3886 &zero, FALSE);
3887
3888 if (!same_swizzle_terms(&inst->Src[0])) {
3889 /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
3890 * logically OR the swizzle terms. Most uses of KILL_IF only
3891 * test one channel so it's good to avoid these extra steps.
3892 */
3893 struct tgsi_full_src_register tmp_src_yyyy =
3894 scalar_src(&tmp_src, TGSI_SWIZZLE_Y);
3895 struct tgsi_full_src_register tmp_src_zzzz =
3896 scalar_src(&tmp_src, TGSI_SWIZZLE_Z);
3897 struct tgsi_full_src_register tmp_src_wwww =
3898 scalar_src(&tmp_src, TGSI_SWIZZLE_W);
3899
3900 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
3901 &tmp_src_yyyy, FALSE);
3902 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
3903 &tmp_src_zzzz, FALSE);
3904 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
3905 &tmp_src_wwww, FALSE);
3906 }
3907
3908 begin_emit_instruction(emit);
3909 emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */
3910 emit_src_register(emit, &tmp_src_xxxx);
3911 end_emit_instruction(emit);
3912
3913 free_temp_indexes(emit);
3914
3915 return TRUE;
3916 }
3917
3918
3919 /**
3920 * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard).
3921 */
3922 static boolean
3923 emit_kill(struct svga_shader_emitter_v10 *emit,
3924 const struct tgsi_full_instruction *inst)
3925 {
3926 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
3927
3928 /* DISCARD if 0.0 is zero */
3929 begin_emit_instruction(emit);
3930 emit_discard_opcode(emit, FALSE);
3931 emit_src_register(emit, &zero);
3932 end_emit_instruction(emit);
3933
3934 return TRUE;
3935 }
3936
3937
3938 /**
3939 * Emit code for TGSI_OPCODE_LG2 instruction.
3940 */
3941 static boolean
3942 emit_lg2(struct svga_shader_emitter_v10 *emit,
3943 const struct tgsi_full_instruction *inst)
3944 {
3945 /* Note that TGSI_OPCODE_LG2 computes only one value from src.x
3946 * while VGPU10 computes four values.
3947 *
3948 * dst = LG2(src):
3949 * dst.xyzw = log2(src.x)
3950 */
3951
3952 struct tgsi_full_src_register src_xxxx =
3953 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
3954 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
3955
3956 /* LOG tmp, s0.xxxx */
3957 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &inst->Dst[0], &src_xxxx,
3958 inst->Instruction.Saturate);
3959
3960 return TRUE;
3961 }
3962
3963
3964 /**
3965 * Emit code for TGSI_OPCODE_LIT instruction.
3966 */
3967 static boolean
3968 emit_lit(struct svga_shader_emitter_v10 *emit,
3969 const struct tgsi_full_instruction *inst)
3970 {
3971 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
3972
3973 /*
3974 * If dst and src are the same we need to create
3975 * a temporary for it and insert a extra move.
3976 */
3977 unsigned tmp_move = get_temp_index(emit);
3978 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
3979 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
3980
3981 /*
3982 * dst.x = 1
3983 * dst.y = max(src.x, 0)
3984 * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
3985 * dst.w = 1
3986 */
3987
3988 /* MOV dst.x, 1.0 */
3989 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3990 struct tgsi_full_dst_register dst_x =
3991 writemask_dst(&move_dst, TGSI_WRITEMASK_X);
3992 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE);
3993 }
3994
3995 /* MOV dst.w, 1.0 */
3996 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3997 struct tgsi_full_dst_register dst_w =
3998 writemask_dst(&move_dst, TGSI_WRITEMASK_W);
3999 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE);
4000 }
4001
4002 /* MAX dst.y, src.x, 0.0 */
4003 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
4004 struct tgsi_full_dst_register dst_y =
4005 writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
4006 struct tgsi_full_src_register zero =
4007 make_immediate_reg_float(emit, 0.0f);
4008 struct tgsi_full_src_register src_xxxx =
4009 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
4010 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
4011
4012 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx,
4013 &zero, inst->Instruction.Saturate);
4014 }
4015
4016 /*
4017 * tmp1 = clamp(src.w, -128, 128);
4018 * MAX tmp1, src.w, -128
4019 * MIN tmp1, tmp1, 128
4020 *
4021 * tmp2 = max(tmp2, 0);
4022 * MAX tmp2, src.y, 0
4023 *
4024 * tmp1 = pow(tmp2, tmp1);
4025 * LOG tmp2, tmp2
4026 * MUL tmp1, tmp2, tmp1
4027 * EXP tmp1, tmp1
4028 *
4029 * tmp1 = (src.w == 0) ? 1 : tmp1;
4030 * EQ tmp2, 0, src.w
4031 * MOVC tmp1, tmp2, 1.0, tmp1
4032 *
4033 * dst.z = (0 < src.x) ? tmp1 : 0;
4034 * LT tmp2, 0, src.x
4035 * MOVC dst.z, tmp2, tmp1, 0.0
4036 */
4037 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
4038 struct tgsi_full_dst_register dst_z =
4039 writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
4040
4041 unsigned tmp1 = get_temp_index(emit);
4042 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
4043 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
4044 unsigned tmp2 = get_temp_index(emit);
4045 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
4046 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
4047
4048 struct tgsi_full_src_register src_xxxx =
4049 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
4050 struct tgsi_full_src_register src_yyyy =
4051 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
4052 struct tgsi_full_src_register src_wwww =
4053 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
4054
4055 struct tgsi_full_src_register zero =
4056 make_immediate_reg_float(emit, 0.0f);
4057 struct tgsi_full_src_register lowerbound =
4058 make_immediate_reg_float(emit, -128.0f);
4059 struct tgsi_full_src_register upperbound =
4060 make_immediate_reg_float(emit, 128.0f);
4061
4062 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww,
4063 &lowerbound, FALSE);
4064 emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src,
4065 &upperbound, FALSE);
4066 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy,
4067 &zero, FALSE);
4068
4069 /* POW tmp1, tmp2, tmp1 */
4070 /* LOG tmp2, tmp2 */
4071 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src,
4072 FALSE);
4073
4074 /* MUL tmp1, tmp2, tmp1 */
4075 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src,
4076 &tmp1_src, FALSE);
4077
4078 /* EXP tmp1, tmp1 */
4079 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src,
4080 FALSE);
4081
4082 /* EQ tmp2, 0, src.w */
4083 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero,
4084 &src_wwww, FALSE);
4085 /* MOVC tmp1.z, tmp2, tmp1, 1.0 */
4086 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst,
4087 &tmp2_src, &one, &tmp1_src, FALSE);
4088
4089 /* LT tmp2, 0, src.x */
4090 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero,
4091 &src_xxxx, FALSE);
4092 /* MOVC dst.z, tmp2, tmp1, 0.0 */
4093 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z,
4094 &tmp2_src, &tmp1_src, &zero, FALSE);
4095 }
4096
4097 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
4098 FALSE);
4099 free_temp_indexes(emit);
4100
4101 return TRUE;
4102 }
4103
4104
4105 /**
4106 * Emit code for TGSI_OPCODE_LOG instruction.
4107 */
4108 static boolean
4109 emit_log(struct svga_shader_emitter_v10 *emit,
4110 const struct tgsi_full_instruction *inst)
4111 {
4112 /*
4113 * dst.x = floor(lg2(abs(s0.x)))
4114 * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x))))
4115 * dst.z = lg2(abs(s0.x))
4116 * dst.w = 1.0
4117 */
4118
4119 struct tgsi_full_src_register src_xxxx =
4120 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
4121 unsigned tmp = get_temp_index(emit);
4122 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4123 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4124 struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx);
4125
4126 /* only use X component of temp reg */
4127 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
4128 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
4129
4130 /* LOG tmp.x, abs(s0.x) */
4131 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
4132 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst,
4133 &abs_src_xxxx, FALSE);
4134 }
4135
4136 /* MOV dst.z, tmp.x */
4137 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
4138 struct tgsi_full_dst_register dst_z =
4139 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z);
4140
4141 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z,
4142 &tmp_src, inst->Instruction.Saturate);
4143 }
4144
4145 /* FLR tmp.x, tmp.x */
4146 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
4147 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
4148 &tmp_src, FALSE);
4149 }
4150
4151 /* MOV dst.x, tmp.x */
4152 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
4153 struct tgsi_full_dst_register dst_x =
4154 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
4155
4156 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &tmp_src,
4157 inst->Instruction.Saturate);
4158 }
4159
4160 /* EXP tmp.x, tmp.x */
4161 /* DIV dst.y, abs(s0.x), tmp.x */
4162 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
4163 struct tgsi_full_dst_register dst_y =
4164 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
4165
4166 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src,
4167 FALSE);
4168 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx,
4169 &tmp_src, inst->Instruction.Saturate);
4170 }
4171
4172 /* MOV dst.w, 1.0 */
4173 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
4174 struct tgsi_full_dst_register dst_w =
4175 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W);
4176 struct tgsi_full_src_register one =
4177 make_immediate_reg_float(emit, 1.0f);
4178
4179 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE);
4180 }
4181
4182 free_temp_indexes(emit);
4183
4184 return TRUE;
4185 }
4186
4187
4188 /**
4189 * Emit code for TGSI_OPCODE_LRP instruction.
4190 */
4191 static boolean
4192 emit_lrp(struct svga_shader_emitter_v10 *emit,
4193 const struct tgsi_full_instruction *inst)
4194 {
4195 /* dst = LRP(s0, s1, s2):
4196 * dst = s0 * (s1 - s2) + s2
4197 * Translates into:
4198 * SUB tmp, s1, s2; tmp = s1 - s2
4199 * MAD dst, s0, tmp, s2; dst = s0 * t1 + s2
4200 */
4201 unsigned tmp = get_temp_index(emit);
4202 struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp);
4203 struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp);
4204 struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]);
4205
4206 /* ADD tmp, s1, -s2 */
4207 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_tmp,
4208 &inst->Src[1], &neg_src2, FALSE);
4209
4210 /* MAD dst, s1, tmp, s3 */
4211 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &inst->Dst[0],
4212 &inst->Src[0], &src_tmp, &inst->Src[2],
4213 inst->Instruction.Saturate);
4214
4215 free_temp_indexes(emit);
4216
4217 return TRUE;
4218 }
4219
4220
4221 /**
4222 * Emit code for TGSI_OPCODE_POW instruction.
4223 */
4224 static boolean
4225 emit_pow(struct svga_shader_emitter_v10 *emit,
4226 const struct tgsi_full_instruction *inst)
4227 {
4228 /* Note that TGSI_OPCODE_POW computes only one value from src0.x and
4229 * src1.x while VGPU10 computes four values.
4230 *
4231 * dst = POW(src0, src1):
4232 * dst.xyzw = src0.x ^ src1.x
4233 */
4234 unsigned tmp = get_temp_index(emit);
4235 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4236 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4237 struct tgsi_full_src_register src0_xxxx =
4238 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
4239 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
4240 struct tgsi_full_src_register src1_xxxx =
4241 swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
4242 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
4243
4244 /* LOG tmp, s0.xxxx */
4245 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &src0_xxxx,
4246 FALSE);
4247
4248 /* MUL tmp, tmp, s1.xxxx */
4249 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, &tmp_src,
4250 &src1_xxxx, FALSE);
4251
4252 /* EXP tmp, s0.xxxx */
4253 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0],
4254 &tmp_src, inst->Instruction.Saturate);
4255
4256 /* free tmp */
4257 free_temp_indexes(emit);
4258
4259 return TRUE;
4260 }
4261
4262
4263 /**
4264 * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction.
4265 */
4266 static boolean
4267 emit_rcp(struct svga_shader_emitter_v10 *emit,
4268 const struct tgsi_full_instruction *inst)
4269 {
4270 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
4271
4272 unsigned tmp = get_temp_index(emit);
4273 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4274 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4275
4276 struct tgsi_full_dst_register tmp_dst_x =
4277 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
4278 struct tgsi_full_src_register tmp_src_xxxx =
4279 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
4280
4281 /* DIV tmp.x, 1.0, s0 */
4282 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst_x, &one,
4283 &inst->Src[0], FALSE);
4284
4285 /* MOV dst, tmp.xxxx */
4286 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
4287 &tmp_src_xxxx, inst->Instruction.Saturate);
4288
4289 free_temp_indexes(emit);
4290
4291 return TRUE;
4292 }
4293
4294
4295 /**
4296 * Emit code for TGSI_OPCODE_RSQ instruction.
4297 */
4298 static boolean
4299 emit_rsq(struct svga_shader_emitter_v10 *emit,
4300 const struct tgsi_full_instruction *inst)
4301 {
4302 /* dst = RSQ(src):
4303 * dst.xyzw = 1 / sqrt(src.x)
4304 * Translates into:
4305 * RSQ tmp, src.x
4306 * MOV dst, tmp.xxxx
4307 */
4308
4309 unsigned tmp = get_temp_index(emit);
4310 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4311 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4312
4313 struct tgsi_full_dst_register tmp_dst_x =
4314 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
4315 struct tgsi_full_src_register tmp_src_xxxx =
4316 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
4317
4318 /* RSQ tmp, src.x */
4319 emit_instruction_op1(emit, VGPU10_OPCODE_RSQ, &tmp_dst_x,
4320 &inst->Src[0], FALSE);
4321
4322 /* MOV dst, tmp.xxxx */
4323 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
4324 &tmp_src_xxxx, inst->Instruction.Saturate);
4325
4326 /* free tmp */
4327 free_temp_indexes(emit);
4328
4329 return TRUE;
4330 }
4331
4332
4333 /**
4334 * Emit code for TGSI_OPCODE_SCS instruction.
4335 */
4336 static boolean
4337 emit_scs(struct svga_shader_emitter_v10 *emit,
4338 const struct tgsi_full_instruction *inst)
4339 {
4340 /* dst.x = cos(src.x)
4341 * dst.y = sin(src.x)
4342 * dst.z = 0.0
4343 * dst.w = 1.0
4344 */
4345 struct tgsi_full_dst_register dst_x =
4346 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
4347 struct tgsi_full_dst_register dst_y =
4348 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
4349 struct tgsi_full_dst_register dst_zw =
4350 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_ZW);
4351
4352 struct tgsi_full_src_register zero_one =
4353 make_immediate_reg_float4(emit, 0.0f, 0.0f, 0.0f, 1.0f);
4354
4355 begin_emit_instruction(emit);
4356 emit_opcode(emit, VGPU10_OPCODE_SINCOS, inst->Instruction.Saturate);
4357 emit_dst_register(emit, &dst_y);
4358 emit_dst_register(emit, &dst_x);
4359 emit_src_register(emit, &inst->Src[0]);
4360 end_emit_instruction(emit);
4361
4362 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
4363 &dst_zw, &zero_one, inst->Instruction.Saturate);
4364
4365 return TRUE;
4366 }
4367
4368
4369 /**
4370 * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
4371 */
4372 static boolean
4373 emit_seq(struct svga_shader_emitter_v10 *emit,
4374 const struct tgsi_full_instruction *inst)
4375 {
4376 /* dst = SEQ(s0, s1):
4377 * dst = s0 == s1 ? 1.0 : 0.0 (per component)
4378 * Translates into:
4379 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp)
4380 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
4381 */
4382 unsigned tmp = get_temp_index(emit);
4383 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4384 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4385 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
4386 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
4387
4388 /* EQ tmp, s0, s1 */
4389 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0],
4390 &inst->Src[1], FALSE);
4391
4392 /* MOVC dst, tmp, one, zero */
4393 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
4394 &one, &zero, FALSE);
4395
4396 free_temp_indexes(emit);
4397
4398 return TRUE;
4399 }
4400
4401
4402 /**
4403 * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction.
4404 */
4405 static boolean
4406 emit_sge(struct svga_shader_emitter_v10 *emit,
4407 const struct tgsi_full_instruction *inst)
4408 {
4409 /* dst = SGE(s0, s1):
4410 * dst = s0 >= s1 ? 1.0 : 0.0 (per component)
4411 * Translates into:
4412 * GE tmp, s0, s1; tmp = s0 >= s1 : 0xffffffff : 0 (per comp)
4413 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
4414 */
4415 unsigned tmp = get_temp_index(emit);
4416 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4417 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4418 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
4419 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
4420
4421 /* GE tmp, s0, s1 */
4422 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0],
4423 &inst->Src[1], FALSE);
4424
4425 /* MOVC dst, tmp, one, zero */
4426 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
4427 &one, &zero, FALSE);
4428
4429 free_temp_indexes(emit);
4430
4431 return TRUE;
4432 }
4433
4434
4435 /**
4436 * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction.
4437 */
4438 static boolean
4439 emit_sgt(struct svga_shader_emitter_v10 *emit,
4440 const struct tgsi_full_instruction *inst)
4441 {
4442 /* dst = SGT(s0, s1):
4443 * dst = s0 > s1 ? 1.0 : 0.0 (per component)
4444 * Translates into:
4445 * LT tmp, s1, s0; tmp = s1 < s0 ? 0xffffffff : 0 (per comp)
4446 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
4447 */
4448 unsigned tmp = get_temp_index(emit);
4449 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4450 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4451 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
4452 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
4453
4454 /* LT tmp, s1, s0 */
4455 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1],
4456 &inst->Src[0], FALSE);
4457
4458 /* MOVC dst, tmp, one, zero */
4459 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
4460 &one, &zero, FALSE);
4461
4462 free_temp_indexes(emit);
4463
4464 return TRUE;
4465 }
4466
4467
4468 /**
4469 * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions.
4470 */
4471 static boolean
4472 emit_sincos(struct svga_shader_emitter_v10 *emit,
4473 const struct tgsi_full_instruction *inst)
4474 {
4475 unsigned tmp = get_temp_index(emit);
4476 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4477 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4478
4479 struct tgsi_full_src_register tmp_src_xxxx =
4480 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
4481 struct tgsi_full_dst_register tmp_dst_x =
4482 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
4483
4484 begin_emit_instruction(emit);
4485 emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE);
4486
4487 if(inst->Instruction.Opcode == TGSI_OPCODE_SIN)
4488 {
4489 emit_dst_register(emit, &tmp_dst_x); /* first destination register */
4490 emit_null_dst_register(emit); /* second destination register */
4491 }
4492 else {
4493 emit_null_dst_register(emit);
4494 emit_dst_register(emit, &tmp_dst_x);
4495 }
4496
4497 emit_src_register(emit, &inst->Src[0]);
4498 end_emit_instruction(emit);
4499
4500 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
4501 &tmp_src_xxxx, inst->Instruction.Saturate);
4502
4503 free_temp_indexes(emit);
4504
4505 return TRUE;
4506 }
4507
4508
4509 /**
4510 * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction.
4511 */
4512 static boolean
4513 emit_sle(struct svga_shader_emitter_v10 *emit,
4514 const struct tgsi_full_instruction *inst)
4515 {
4516 /* dst = SLE(s0, s1):
4517 * dst = s0 <= s1 ? 1.0 : 0.0 (per component)
4518 * Translates into:
4519 * GE tmp, s1, s0; tmp = s1 >= s0 : 0xffffffff : 0 (per comp)
4520 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
4521 */
4522 unsigned tmp = get_temp_index(emit);
4523 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4524 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4525 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
4526 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
4527
4528 /* GE tmp, s1, s0 */
4529 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1],
4530 &inst->Src[0], FALSE);
4531
4532 /* MOVC dst, tmp, one, zero */
4533 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
4534 &one, &zero, FALSE);
4535
4536 free_temp_indexes(emit);
4537
4538 return TRUE;
4539 }
4540
4541
4542 /**
4543 * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction.
4544 */
4545 static boolean
4546 emit_slt(struct svga_shader_emitter_v10 *emit,
4547 const struct tgsi_full_instruction *inst)
4548 {
4549 /* dst = SLT(s0, s1):
4550 * dst = s0 < s1 ? 1.0 : 0.0 (per component)
4551 * Translates into:
4552 * LT tmp, s0, s1; tmp = s0 < s1 ? 0xffffffff : 0 (per comp)
4553 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
4554 */
4555 unsigned tmp = get_temp_index(emit);
4556 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4557 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4558 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
4559 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
4560
4561 /* LT tmp, s0, s1 */
4562 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
4563 &inst->Src[1], FALSE);
4564
4565 /* MOVC dst, tmp, one, zero */
4566 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
4567 &one, &zero, FALSE);
4568
4569 free_temp_indexes(emit);
4570
4571 return TRUE;
4572 }
4573
4574
4575 /**
4576 * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction.
4577 */
4578 static boolean
4579 emit_sne(struct svga_shader_emitter_v10 *emit,
4580 const struct tgsi_full_instruction *inst)
4581 {
4582 /* dst = SNE(s0, s1):
4583 * dst = s0 != s1 ? 1.0 : 0.0 (per component)
4584 * Translates into:
4585 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp)
4586 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
4587 */
4588 unsigned tmp = get_temp_index(emit);
4589 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4590 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4591 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
4592 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
4593
4594 /* NE tmp, s0, s1 */
4595 emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0],
4596 &inst->Src[1], FALSE);
4597
4598 /* MOVC dst, tmp, one, zero */
4599 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
4600 &one, &zero, FALSE);
4601
4602 free_temp_indexes(emit);
4603
4604 return TRUE;
4605 }
4606
4607
4608 /**
4609 * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction.
4610 */
4611 static boolean
4612 emit_ssg(struct svga_shader_emitter_v10 *emit,
4613 const struct tgsi_full_instruction *inst)
4614 {
4615 /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
4616 * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
4617 * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
4618 * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
4619 * Translates into:
4620 * LT tmp1, src, zero; tmp1 = src < zero ? 0xffffffff : 0 (per comp)
4621 * MOVC tmp2, tmp1, -1.0, 0.0; tmp2 = tmp1 ? -1.0 : 0.0 (per component)
4622 * LT tmp1, zero, src; tmp1 = zero < src ? 0xffffffff : 0 (per comp)
4623 * MOVC dst, tmp1, 1.0, tmp2; dst = tmp1 ? 1.0 : tmp2 (per component)
4624 */
4625 struct tgsi_full_src_register zero =
4626 make_immediate_reg_float(emit, 0.0f);
4627 struct tgsi_full_src_register one =
4628 make_immediate_reg_float(emit, 1.0f);
4629 struct tgsi_full_src_register neg_one =
4630 make_immediate_reg_float(emit, -1.0f);
4631
4632 unsigned tmp1 = get_temp_index(emit);
4633 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
4634 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
4635
4636 unsigned tmp2 = get_temp_index(emit);
4637 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
4638 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
4639
4640 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0],
4641 &zero, FALSE);
4642 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src,
4643 &neg_one, &zero, FALSE);
4644 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero,
4645 &inst->Src[0], FALSE);
4646 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src,
4647 &one, &tmp2_src, FALSE);
4648
4649 free_temp_indexes(emit);
4650
4651 return TRUE;
4652 }
4653
4654
4655 /**
4656 * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction.
4657 */
4658 static boolean
4659 emit_issg(struct svga_shader_emitter_v10 *emit,
4660 const struct tgsi_full_instruction *inst)
4661 {
4662 /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
4663 * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
4664 * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
4665 * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
4666 * Translates into:
4667 * ILT tmp1, src, 0 tmp1 = src < 0 ? -1 : 0 (per component)
4668 * ILT tmp2, 0, src tmp2 = 0 < src ? -1 : 0 (per component)
4669 * IADD dst, tmp1, neg(tmp2) dst = tmp1 - tmp2 (per component)
4670 */
4671 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
4672
4673 unsigned tmp1 = get_temp_index(emit);
4674 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
4675 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
4676
4677 unsigned tmp2 = get_temp_index(emit);
4678 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
4679 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
4680
4681 struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src);
4682
4683 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst,
4684 &inst->Src[0], &zero, FALSE);
4685 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst,
4686 &zero, &inst->Src[0], FALSE);
4687 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0],
4688 &tmp1_src, &neg_tmp2, FALSE);
4689
4690 free_temp_indexes(emit);
4691
4692 return TRUE;
4693 }
4694
4695
4696 /**
4697 * Emit code for TGSI_OPCODE_SUB instruction.
4698 */
4699 static boolean
4700 emit_sub(struct svga_shader_emitter_v10 *emit,
4701 const struct tgsi_full_instruction *inst)
4702 {
4703 /* dst = SUB(s0, s1):
4704 * dst = s0 - s1
4705 * Translates into:
4706 * ADD dst, s0, neg(s1)
4707 */
4708 struct tgsi_full_src_register neg_src1 = negate_src(&inst->Src[1]);
4709
4710 /* ADD dst, s0, neg(s1) */
4711 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0],
4712 &inst->Src[0], &neg_src1,
4713 inst->Instruction.Saturate);
4714
4715 return TRUE;
4716 }
4717
4718
4719 /**
4720 * Emit a comparison instruction. The dest register will get
4721 * 0 or ~0 values depending on the outcome of comparing src0 to src1.
4722 */
4723 static void
4724 emit_comparison(struct svga_shader_emitter_v10 *emit,
4725 SVGA3dCmpFunc func,
4726 const struct tgsi_full_dst_register *dst,
4727 const struct tgsi_full_src_register *src0,
4728 const struct tgsi_full_src_register *src1)
4729 {
4730 struct tgsi_full_src_register immediate;
4731 VGPU10OpcodeToken0 opcode0;
4732 boolean swapSrc = FALSE;
4733
4734 /* Sanity checks for svga vs. gallium enums */
4735 STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1));
4736 STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1));
4737
4738 opcode0.value = 0;
4739
4740 switch (func) {
4741 case SVGA3D_CMP_NEVER:
4742 immediate = make_immediate_reg_int(emit, 0);
4743 /* MOV dst, {0} */
4744 begin_emit_instruction(emit);
4745 emit_dword(emit, VGPU10_OPCODE_MOV);
4746 emit_dst_register(emit, dst);
4747 emit_src_register(emit, &immediate);
4748 end_emit_instruction(emit);
4749 return;
4750 case SVGA3D_CMP_ALWAYS:
4751 immediate = make_immediate_reg_int(emit, -1);
4752 /* MOV dst, {-1} */
4753 begin_emit_instruction(emit);
4754 emit_dword(emit, VGPU10_OPCODE_MOV);
4755 emit_dst_register(emit, dst);
4756 emit_src_register(emit, &immediate);
4757 end_emit_instruction(emit);
4758 return;
4759 case SVGA3D_CMP_LESS:
4760 opcode0.opcodeType = VGPU10_OPCODE_LT;
4761 break;
4762 case SVGA3D_CMP_EQUAL:
4763 opcode0.opcodeType = VGPU10_OPCODE_EQ;
4764 break;
4765 case SVGA3D_CMP_LESSEQUAL:
4766 opcode0.opcodeType = VGPU10_OPCODE_GE;
4767 swapSrc = TRUE;
4768 break;
4769 case SVGA3D_CMP_GREATER:
4770 opcode0.opcodeType = VGPU10_OPCODE_LT;
4771 swapSrc = TRUE;
4772 break;
4773 case SVGA3D_CMP_NOTEQUAL:
4774 opcode0.opcodeType = VGPU10_OPCODE_NE;
4775 break;
4776 case SVGA3D_CMP_GREATEREQUAL:
4777 opcode0.opcodeType = VGPU10_OPCODE_GE;
4778 break;
4779 default:
4780 assert(!"Unexpected comparison mode");
4781 opcode0.opcodeType = VGPU10_OPCODE_EQ;
4782 }
4783
4784 begin_emit_instruction(emit);
4785 emit_dword(emit, opcode0.value);
4786 emit_dst_register(emit, dst);
4787 if (swapSrc) {
4788 emit_src_register(emit, src1);
4789 emit_src_register(emit, src0);
4790 }
4791 else {
4792 emit_src_register(emit, src0);
4793 emit_src_register(emit, src1);
4794 }
4795 end_emit_instruction(emit);
4796 }
4797
4798
4799 /**
4800 * Get texel/address offsets for a texture instruction.
4801 */
4802 static void
4803 get_texel_offsets(const struct svga_shader_emitter_v10 *emit,
4804 const struct tgsi_full_instruction *inst, int offsets[3])
4805 {
4806 if (inst->Texture.NumOffsets == 1) {
4807 /* According to OpenGL Shader Language spec the offsets are only
4808 * fetched from a previously-declared immediate/literal.
4809 */
4810 const struct tgsi_texture_offset *off = inst->TexOffsets;
4811 const unsigned index = off[0].Index;
4812 const unsigned swizzleX = off[0].SwizzleX;
4813 const unsigned swizzleY = off[0].SwizzleY;
4814 const unsigned swizzleZ = off[0].SwizzleZ;
4815 const union tgsi_immediate_data *imm = emit->immediates[index];
4816
4817 assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE);
4818
4819 offsets[0] = imm[swizzleX].Int;
4820 offsets[1] = imm[swizzleY].Int;
4821 offsets[2] = imm[swizzleZ].Int;
4822 }
4823 else {
4824 offsets[0] = offsets[1] = offsets[2] = 0;
4825 }
4826 }
4827
4828
4829 /**
4830 * Set up the coordinate register for texture sampling.
4831 * When we're sampling from a RECT texture we have to scale the
4832 * unnormalized coordinate to a normalized coordinate.
4833 * We do that by multiplying the coordinate by an "extra" constant.
4834 * An alternative would be to use the RESINFO instruction to query the
4835 * texture's size.
4836 */
4837 static struct tgsi_full_src_register
4838 setup_texcoord(struct svga_shader_emitter_v10 *emit,
4839 unsigned unit,
4840 const struct tgsi_full_src_register *coord)
4841 {
4842 if (emit->key.tex[unit].unnormalized) {
4843 unsigned scale_index = emit->texcoord_scale_index[unit];
4844 unsigned tmp = get_temp_index(emit);
4845 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4846 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4847 struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
4848
4849 /* MUL tmp, coord, const[] */
4850 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
4851 coord, &scale_src, FALSE);
4852 return tmp_src;
4853 }
4854 else {
4855 /* use texcoord as-is */
4856 return *coord;
4857 }
4858 }
4859
4860
4861 /**
4862 * For SAMPLE_C instructions, emit the extra src register which indicates
4863 * the reference/comparision value.
4864 */
4865 static void
4866 emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
4867 unsigned target,
4868 const struct tgsi_full_src_register *coord)
4869 {
4870 struct tgsi_full_src_register coord_src_ref;
4871 unsigned component;
4872
4873 assert(tgsi_is_shadow_target(target));
4874
4875 assert(target != TGSI_TEXTURE_SHADOWCUBE_ARRAY); /* XXX not implemented */
4876 if (target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
4877 target == TGSI_TEXTURE_SHADOWCUBE)
4878 component = TGSI_SWIZZLE_W;
4879 else
4880 component = TGSI_SWIZZLE_Z;
4881
4882 coord_src_ref = scalar_src(coord, component);
4883
4884 emit_src_register(emit, &coord_src_ref);
4885 }
4886
4887
4888 /**
4889 * Info for implementing texture swizzles.
4890 * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle()
4891 * functions use this to encapsulate the extra steps needed to perform
4892 * a texture swizzle, or shadow/depth comparisons.
4893 * The shadow/depth comparison is only done here if for the cases where
4894 * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare).
4895 */
4896 struct tex_swizzle_info
4897 {
4898 boolean swizzled;
4899 boolean shadow_compare;
4900 unsigned unit;
4901 unsigned texture_target; /**< TGSI_TEXTURE_x */
4902 struct tgsi_full_src_register tmp_src;
4903 struct tgsi_full_dst_register tmp_dst;
4904 const struct tgsi_full_dst_register *inst_dst;
4905 const struct tgsi_full_src_register *coord_src;
4906 };
4907
4908
4909 /**
4910 * Do setup for handling texture swizzles or shadow compares.
4911 * \param unit the texture unit
4912 * \param inst the TGSI texture instruction
4913 * \param shadow_compare do shadow/depth comparison?
4914 * \param swz returns the swizzle info
4915 */
4916 static void
4917 begin_tex_swizzle(struct svga_shader_emitter_v10 *emit,
4918 unsigned unit,
4919 const struct tgsi_full_instruction *inst,
4920 boolean shadow_compare,
4921 struct tex_swizzle_info *swz)
4922 {
4923 swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X ||
4924 emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y ||
4925 emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z ||
4926 emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W);
4927
4928 swz->shadow_compare = shadow_compare;
4929 swz->texture_target = inst->Texture.Texture;
4930
4931 if (swz->swizzled || shadow_compare) {
4932 /* Allocate temp register for the result of the SAMPLE instruction
4933 * and the source of the MOV/compare/swizzle instructions.
4934 */
4935 unsigned tmp = get_temp_index(emit);
4936 swz->tmp_src = make_src_temp_reg(tmp);
4937 swz->tmp_dst = make_dst_temp_reg(tmp);
4938
4939 swz->unit = unit;
4940 }
4941 swz->inst_dst = &inst->Dst[0];
4942 swz->coord_src = &inst->Src[0];
4943 }
4944
4945
4946 /**
4947 * Returns the register to put the SAMPLE instruction results into.
4948 * This will either be the original instruction dst reg (if no swizzle
4949 * and no shadow comparison) or a temporary reg if there is a swizzle.
4950 */
4951 static const struct tgsi_full_dst_register *
4952 get_tex_swizzle_dst(const struct tex_swizzle_info *swz)
4953 {
4954 return (swz->swizzled || swz->shadow_compare)
4955 ? &swz->tmp_dst : swz->inst_dst;
4956 }
4957
4958
4959 /**
4960 * This emits the MOV instruction that actually implements a texture swizzle
4961 * and/or shadow comparison.
4962 */
4963 static void
4964 end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
4965 const struct tex_swizzle_info *swz)
4966 {
4967 if (swz->shadow_compare) {
4968 /* Emit extra instructions to compare the fetched texel value against
4969 * a texture coordinate component. The result of the comparison
4970 * is 0.0 or 1.0.
4971 */
4972 struct tgsi_full_src_register coord_src;
4973 struct tgsi_full_src_register texel_src =
4974 scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X);
4975 struct tgsi_full_src_register one =
4976 make_immediate_reg_float(emit, 1.0f);
4977 /* convert gallium comparison func to SVGA comparison func */
4978 SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1;
4979
4980 assert(emit->unit == PIPE_SHADER_FRAGMENT);
4981
4982 switch (swz->texture_target) {
4983 case TGSI_TEXTURE_SHADOW2D:
4984 case TGSI_TEXTURE_SHADOWRECT:
4985 case TGSI_TEXTURE_SHADOW1D_ARRAY:
4986 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z);
4987 break;
4988 case TGSI_TEXTURE_SHADOW1D:
4989 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Y);
4990 break;
4991 case TGSI_TEXTURE_SHADOWCUBE:
4992 case TGSI_TEXTURE_SHADOW2D_ARRAY:
4993 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_W);
4994 break;
4995 default:
4996 assert(!"Unexpected texture target in end_tex_swizzle()");
4997 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z);
4998 }
4999
5000 /* COMPARE tmp, coord, texel */
5001 /* XXX it would seem that the texel and coord arguments should
5002 * be transposed here, but piglit tests indicate otherwise.
5003 */
5004 emit_comparison(emit, compare_func,
5005 &swz->tmp_dst, &texel_src, &coord_src);
5006
5007 /* AND dest, tmp, {1.0} */
5008 begin_emit_instruction(emit);
5009 emit_opcode(emit, VGPU10_OPCODE_AND, FALSE);
5010 if (swz->swizzled) {
5011 emit_dst_register(emit, &swz->tmp_dst);
5012 }
5013 else {
5014 emit_dst_register(emit, swz->inst_dst);
5015 }
5016 emit_src_register(emit, &swz->tmp_src);
5017 emit_src_register(emit, &one);
5018 end_emit_instruction(emit);
5019 }
5020
5021 if (swz->swizzled) {
5022 unsigned swz_r = emit->key.tex[swz->unit].swizzle_r;
5023 unsigned swz_g = emit->key.tex[swz->unit].swizzle_g;
5024 unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
5025 unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
5026 unsigned writemask_0 = 0, writemask_1 = 0;
5027 boolean int_tex = is_integer_type(emit->key.tex[swz->unit].return_type);
5028
5029 /* Swizzle w/out zero/one terms */
5030 struct tgsi_full_src_register src_swizzled =
5031 swizzle_src(&swz->tmp_src,
5032 swz_r < PIPE_SWIZZLE_ZERO ? swz_r : PIPE_SWIZZLE_RED,
5033 swz_g < PIPE_SWIZZLE_ZERO ? swz_g : PIPE_SWIZZLE_GREEN,
5034 swz_b < PIPE_SWIZZLE_ZERO ? swz_b : PIPE_SWIZZLE_BLUE,
5035 swz_a < PIPE_SWIZZLE_ZERO ? swz_a : PIPE_SWIZZLE_ALPHA);
5036
5037 /* MOV dst, color(tmp).<swizzle> */
5038 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
5039 swz->inst_dst, &src_swizzled, FALSE);
5040
5041 /* handle swizzle zero terms */
5042 writemask_0 = (((swz_r == PIPE_SWIZZLE_ZERO) << 0) |
5043 ((swz_g == PIPE_SWIZZLE_ZERO) << 1) |
5044 ((swz_b == PIPE_SWIZZLE_ZERO) << 2) |
5045 ((swz_a == PIPE_SWIZZLE_ZERO) << 3));
5046
5047 if (writemask_0) {
5048 struct tgsi_full_src_register zero = int_tex ?
5049 make_immediate_reg_int(emit, 0) :
5050 make_immediate_reg_float(emit, 0.0f);
5051 struct tgsi_full_dst_register dst =
5052 writemask_dst(swz->inst_dst, writemask_0);
5053
5054 /* MOV dst.writemask_0, {0,0,0,0} */
5055 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
5056 &dst, &zero, FALSE);
5057 }
5058
5059 /* handle swizzle one terms */
5060 writemask_1 = (((swz_r == PIPE_SWIZZLE_ONE) << 0) |
5061 ((swz_g == PIPE_SWIZZLE_ONE) << 1) |
5062 ((swz_b == PIPE_SWIZZLE_ONE) << 2) |
5063 ((swz_a == PIPE_SWIZZLE_ONE) << 3));
5064
5065 if (writemask_1) {
5066 struct tgsi_full_src_register one = int_tex ?
5067 make_immediate_reg_int(emit, 1) :
5068 make_immediate_reg_float(emit, 1.0f);
5069 struct tgsi_full_dst_register dst =
5070 writemask_dst(swz->inst_dst, writemask_1);
5071
5072 /* MOV dst.writemask_1, {1,1,1,1} */
5073 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one, FALSE);
5074 }
5075 }
5076 }
5077
5078
5079 /**
5080 * Emit code for TGSI_OPCODE_SAMPLE instruction.
5081 */
5082 static boolean
5083 emit_sample(struct svga_shader_emitter_v10 *emit,
5084 const struct tgsi_full_instruction *inst)
5085 {
5086 const unsigned resource_unit = inst->Src[1].Register.Index;
5087 const unsigned sampler_unit = inst->Src[2].Register.Index;
5088 struct tgsi_full_src_register coord;
5089 int offsets[3];
5090 struct tex_swizzle_info swz_info;
5091
5092 begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info);
5093
5094 get_texel_offsets(emit, inst, offsets);
5095
5096 coord = setup_texcoord(emit, resource_unit, &inst->Src[0]);
5097
5098 /* SAMPLE dst, coord(s0), resource, sampler */
5099 begin_emit_instruction(emit);
5100
5101 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
5102 inst->Instruction.Saturate, offsets);
5103 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
5104 emit_src_register(emit, &coord);
5105 emit_resource_register(emit, resource_unit);
5106 emit_sampler_register(emit, sampler_unit);
5107 end_emit_instruction(emit);
5108
5109 end_tex_swizzle(emit, &swz_info);
5110
5111 free_temp_indexes(emit);
5112
5113 return TRUE;
5114 }
5115
5116
5117 /**
5118 * Check if a texture instruction is valid.
5119 * An example of an invalid texture instruction is doing shadow comparison
5120 * with an integer-valued texture.
5121 * If we detect an invalid texture instruction, we replace it with:
5122 * MOV dst, {1,1,1,1};
5123 * \return TRUE if valid, FALSE if invalid.
5124 */
5125 static boolean
5126 is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
5127 const struct tgsi_full_instruction *inst)
5128 {
5129 const unsigned unit = inst->Src[1].Register.Index;
5130 const unsigned target = inst->Texture.Texture;
5131 boolean valid = TRUE;
5132
5133 if (tgsi_is_shadow_target(target) &&
5134 is_integer_type(emit->key.tex[unit].return_type)) {
5135 debug_printf("Invalid SAMPLE_C with an integer texture!\n");
5136 valid = FALSE;
5137 }
5138 /* XXX might check for other conditions in the future here */
5139
5140 if (!valid) {
5141 /* emit a MOV dst, {1,1,1,1} instruction. */
5142 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
5143 begin_emit_instruction(emit);
5144 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
5145 emit_dst_register(emit, &inst->Dst[0]);
5146 emit_src_register(emit, &one);
5147 end_emit_instruction(emit);
5148 }
5149
5150 return valid;
5151 }
5152
5153
5154 /**
5155 * Emit code for TGSI_OPCODE_TEX (simple texture lookup)
5156 */
5157 static boolean
5158 emit_tex(struct svga_shader_emitter_v10 *emit,
5159 const struct tgsi_full_instruction *inst)
5160 {
5161 const uint unit = inst->Src[1].Register.Index;
5162 unsigned target = inst->Texture.Texture;
5163 unsigned opcode;
5164 struct tgsi_full_src_register coord;
5165 int offsets[3];
5166 struct tex_swizzle_info swz_info;
5167
5168 /* check that the sampler returns a float */
5169 if (!is_valid_tex_instruction(emit, inst))
5170 return TRUE;
5171
5172 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
5173
5174 get_texel_offsets(emit, inst, offsets);
5175
5176 coord = setup_texcoord(emit, unit, &inst->Src[0]);
5177
5178 /* SAMPLE dst, coord(s0), resource, sampler */
5179 begin_emit_instruction(emit);
5180
5181 if (tgsi_is_shadow_target(target))
5182 opcode = VGPU10_OPCODE_SAMPLE_C;
5183 else
5184 opcode = VGPU10_OPCODE_SAMPLE;
5185
5186 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
5187 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
5188 emit_src_register(emit, &coord);
5189 emit_resource_register(emit, unit);
5190 emit_sampler_register(emit, unit);
5191 if (opcode == VGPU10_OPCODE_SAMPLE_C) {
5192 emit_tex_compare_refcoord(emit, target, &coord);
5193 }
5194 end_emit_instruction(emit);
5195
5196 end_tex_swizzle(emit, &swz_info);
5197
5198 free_temp_indexes(emit);
5199
5200 return TRUE;
5201 }
5202
5203
5204 /**
5205 * Emit code for TGSI_OPCODE_TXP (projective texture)
5206 */
5207 static boolean
5208 emit_txp(struct svga_shader_emitter_v10 *emit,
5209 const struct tgsi_full_instruction *inst)
5210 {
5211 const uint unit = inst->Src[1].Register.Index;
5212 unsigned target = inst->Texture.Texture;
5213 unsigned opcode;
5214 int offsets[3];
5215 unsigned tmp = get_temp_index(emit);
5216 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
5217 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
5218 struct tgsi_full_src_register src0_wwww =
5219 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
5220 struct tgsi_full_src_register coord;
5221 struct tex_swizzle_info swz_info;
5222
5223 /* check that the sampler returns a float */
5224 if (!is_valid_tex_instruction(emit, inst))
5225 return TRUE;
5226
5227 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
5228
5229 get_texel_offsets(emit, inst, offsets);
5230
5231 coord = setup_texcoord(emit, unit, &inst->Src[0]);
5232
5233 /* DIV tmp, coord, coord.wwww */
5234 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst,
5235 &coord, &src0_wwww, FALSE);
5236
5237 /* SAMPLE dst, coord(tmp), resource, sampler */
5238 begin_emit_instruction(emit);
5239
5240 if (tgsi_is_shadow_target(target))
5241 opcode = VGPU10_OPCODE_SAMPLE_C;
5242 else
5243 opcode = VGPU10_OPCODE_SAMPLE;
5244
5245 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
5246 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
5247 emit_src_register(emit, &tmp_src); /* projected coord */
5248 emit_resource_register(emit, unit);
5249 emit_sampler_register(emit, unit);
5250 if (opcode == VGPU10_OPCODE_SAMPLE_C) {
5251 emit_tex_compare_refcoord(emit, target, &tmp_src);
5252 }
5253 end_emit_instruction(emit);
5254
5255 end_tex_swizzle(emit, &swz_info);
5256
5257 free_temp_indexes(emit);
5258
5259 return TRUE;
5260 }
5261
5262
5263 /*
5264 * Emit code for TGSI_OPCODE_XPD instruction.
5265 */
5266 static boolean
5267 emit_xpd(struct svga_shader_emitter_v10 *emit,
5268 const struct tgsi_full_instruction *inst)
5269 {
5270 /* dst.x = src0.y * src1.z - src1.y * src0.z
5271 * dst.y = src0.z * src1.x - src1.z * src0.x
5272 * dst.z = src0.x * src1.y - src1.x * src0.y
5273 * dst.w = 1
5274 */
5275 struct tgsi_full_src_register s0_xxxx =
5276 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
5277 struct tgsi_full_src_register s0_yyyy =
5278 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
5279 struct tgsi_full_src_register s0_zzzz =
5280 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
5281
5282 struct tgsi_full_src_register s1_xxxx =
5283 scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
5284 struct tgsi_full_src_register s1_yyyy =
5285 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
5286 struct tgsi_full_src_register s1_zzzz =
5287 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Z);
5288
5289 unsigned tmp1 = get_temp_index(emit);
5290 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
5291 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
5292
5293 unsigned tmp2 = get_temp_index(emit);
5294 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
5295 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
5296 struct tgsi_full_src_register neg_tmp2_src = negate_src(&tmp2_src);
5297
5298 unsigned tmp3 = get_temp_index(emit);
5299 struct tgsi_full_src_register tmp3_src = make_src_temp_reg(tmp3);
5300 struct tgsi_full_dst_register tmp3_dst = make_dst_temp_reg(tmp3);
5301 struct tgsi_full_dst_register tmp3_dst_x =
5302 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_X);
5303 struct tgsi_full_dst_register tmp3_dst_y =
5304 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Y);
5305 struct tgsi_full_dst_register tmp3_dst_z =
5306 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Z);
5307 struct tgsi_full_dst_register tmp3_dst_w =
5308 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_W);
5309
5310 /* Note: we put all the intermediate computations into tmp3 in case
5311 * the XPD dest register is that same as one of the src regs (in which
5312 * case we could clobber a src reg before we're done with it) .
5313 *
5314 * Note: we could get by with just one temp register instead of three
5315 * since we're doing scalar operations and there's enough room in one
5316 * temp for everything.
5317 */
5318
5319 /* MUL tmp1, src0.y, src1.z */
5320 /* MUL tmp2, src1.y, src0.z */
5321 /* ADD tmp3.x, tmp1, -tmp2 */
5322 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
5323 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst,
5324 &s0_yyyy, &s1_zzzz, FALSE);
5325 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst,
5326 &s1_yyyy, &s0_zzzz, FALSE);
5327 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_x,
5328 &tmp1_src, &neg_tmp2_src, FALSE);
5329 }
5330
5331 /* MUL tmp1, src0.z, src1.x */
5332 /* MUL tmp2, src1.z, src0.x */
5333 /* ADD tmp3.y, tmp1, -tmp2 */
5334 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
5335 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_zzzz,
5336 &s1_xxxx, FALSE);
5337 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_zzzz,
5338 &s0_xxxx, FALSE);
5339 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_y,
5340 &tmp1_src, &neg_tmp2_src, FALSE);
5341 }
5342
5343 /* MUL tmp1, src0.x, src1.y */
5344 /* MUL tmp2, src1.x, src0.y */
5345 /* ADD tmp3.z, tmp1, -tmp2 */
5346 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
5347 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_xxxx,
5348 &s1_yyyy, FALSE);
5349 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_xxxx,
5350 &s0_yyyy, FALSE);
5351 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_z,
5352 &tmp1_src, &neg_tmp2_src, FALSE);
5353 }
5354
5355 /* MOV tmp3.w, 1.0 */
5356 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
5357 struct tgsi_full_src_register one =
5358 make_immediate_reg_float(emit, 1.0f);
5359
5360 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp3_dst_w, &one, FALSE);
5361 }
5362
5363 /* MOV dst, tmp3 */
5364 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &tmp3_src,
5365 inst->Instruction.Saturate);
5366
5367
5368 free_temp_indexes(emit);
5369
5370 return TRUE;
5371 }
5372
5373
5374 /**
5375 * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
5376 */
5377 static boolean
5378 emit_txd(struct svga_shader_emitter_v10 *emit,
5379 const struct tgsi_full_instruction *inst)
5380 {
5381 const uint unit = inst->Src[3].Register.Index;
5382 unsigned target = inst->Texture.Texture;
5383 int offsets[3];
5384 struct tgsi_full_src_register coord;
5385 struct tex_swizzle_info swz_info;
5386
5387 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
5388 &swz_info);
5389
5390 get_texel_offsets(emit, inst, offsets);
5391
5392 coord = setup_texcoord(emit, unit, &inst->Src[0]);
5393
5394 /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */
5395 begin_emit_instruction(emit);
5396 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D,
5397 inst->Instruction.Saturate, offsets);
5398 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
5399 emit_src_register(emit, &coord);
5400 emit_resource_register(emit, unit);
5401 emit_sampler_register(emit, unit);
5402 emit_src_register(emit, &inst->Src[1]); /* Xderiv */
5403 emit_src_register(emit, &inst->Src[2]); /* Yderiv */
5404 end_emit_instruction(emit);
5405
5406 end_tex_swizzle(emit, &swz_info);
5407
5408 free_temp_indexes(emit);
5409
5410 return TRUE;
5411 }
5412
5413
5414 /**
5415 * Emit code for TGSI_OPCODE_TXF (texel fetch)
5416 */
5417 static boolean
5418 emit_txf(struct svga_shader_emitter_v10 *emit,
5419 const struct tgsi_full_instruction *inst)
5420 {
5421 const uint unit = inst->Src[1].Register.Index;
5422 const unsigned msaa = emit->key.tex[unit].texture_msaa;
5423 int offsets[3];
5424 struct tex_swizzle_info swz_info;
5425
5426 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
5427
5428 get_texel_offsets(emit, inst, offsets);
5429
5430 if (msaa) {
5431 /* Fetch one sample from an MSAA texture */
5432 struct tgsi_full_src_register sampleIndex =
5433 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
5434 /* LD_MS dst, coord(s0), resource, sampleIndex */
5435 begin_emit_instruction(emit);
5436 emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS,
5437 inst->Instruction.Saturate, offsets);
5438 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
5439 emit_src_register(emit, &inst->Src[0]);
5440 emit_resource_register(emit, unit);
5441 emit_src_register(emit, &sampleIndex);
5442 end_emit_instruction(emit);
5443 }
5444 else {
5445 /* Fetch one texel specified by integer coordinate */
5446 /* LD dst, coord(s0), resource */
5447 begin_emit_instruction(emit);
5448 emit_sample_opcode(emit, VGPU10_OPCODE_LD,
5449 inst->Instruction.Saturate, offsets);
5450 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
5451 emit_src_register(emit, &inst->Src[0]);
5452 emit_resource_register(emit, unit);
5453 end_emit_instruction(emit);
5454 }
5455
5456 end_tex_swizzle(emit, &swz_info);
5457
5458 free_temp_indexes(emit);
5459
5460 return TRUE;
5461 }
5462
5463
5464 /**
5465 * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias)
5466 * or TGSI_OPCODE_TXB2 (for cube shadow maps).
5467 */
5468 static boolean
5469 emit_txl_txb(struct svga_shader_emitter_v10 *emit,
5470 const struct tgsi_full_instruction *inst)
5471 {
5472 unsigned target = inst->Texture.Texture;
5473 unsigned opcode, unit;
5474 int offsets[3];
5475 struct tgsi_full_src_register coord, lod_bias;
5476 struct tex_swizzle_info swz_info;
5477
5478 assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
5479 inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
5480 inst->Instruction.Opcode == TGSI_OPCODE_TXB2);
5481
5482 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
5483 lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
5484 unit = inst->Src[2].Register.Index;
5485 }
5486 else {
5487 lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
5488 unit = inst->Src[1].Register.Index;
5489 }
5490
5491 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
5492 &swz_info);
5493
5494 get_texel_offsets(emit, inst, offsets);
5495
5496 coord = setup_texcoord(emit, unit, &inst->Src[0]);
5497
5498 /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */
5499 begin_emit_instruction(emit);
5500 if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
5501 opcode = VGPU10_OPCODE_SAMPLE_L;
5502 }
5503 else {
5504 opcode = VGPU10_OPCODE_SAMPLE_B;
5505 }
5506 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
5507 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
5508 emit_src_register(emit, &coord);
5509 emit_resource_register(emit, unit);
5510 emit_sampler_register(emit, unit);
5511 emit_src_register(emit, &lod_bias);
5512 end_emit_instruction(emit);
5513
5514 end_tex_swizzle(emit, &swz_info);
5515
5516 free_temp_indexes(emit);
5517
5518 return TRUE;
5519 }
5520
5521
5522 /**
5523 * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
5524 */
5525 static boolean
5526 emit_txq(struct svga_shader_emitter_v10 *emit,
5527 const struct tgsi_full_instruction *inst)
5528 {
5529 const uint unit = inst->Src[1].Register.Index;
5530
5531 if (emit->key.tex[unit].texture_target == PIPE_BUFFER) {
5532 /* RESINFO does not support querying texture buffers, so we instead
5533 * store texture buffer sizes in shader constants, then copy them to
5534 * implement TXQ instead of emitting RESINFO.
5535 * MOV dst, const[texture_buffer_size_index[unit]]
5536 */
5537 struct tgsi_full_src_register size_src =
5538 make_src_const_reg(emit->texture_buffer_size_index[unit]);
5539 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src,
5540 FALSE);
5541 } else {
5542 /* RESINFO dst, srcMipLevel, resource */
5543 begin_emit_instruction(emit);
5544 emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
5545 emit_dst_register(emit, &inst->Dst[0]);
5546 emit_src_register(emit, &inst->Src[0]);
5547 emit_resource_register(emit, unit);
5548 end_emit_instruction(emit);
5549 }
5550
5551 free_temp_indexes(emit);
5552
5553 return TRUE;
5554 }
5555
5556
5557 /**
5558 * Emit a simple instruction (like ADD, MUL, MIN, etc).
5559 */
5560 static boolean
5561 emit_simple(struct svga_shader_emitter_v10 *emit,
5562 const struct tgsi_full_instruction *inst)
5563 {
5564 const unsigned opcode = inst->Instruction.Opcode;
5565 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
5566 unsigned i;
5567
5568 begin_emit_instruction(emit);
5569 emit_opcode(emit, translate_opcode(inst->Instruction.Opcode),
5570 inst->Instruction.Saturate);
5571 for (i = 0; i < op->num_dst; i++) {
5572 emit_dst_register(emit, &inst->Dst[i]);
5573 }
5574 for (i = 0; i < op->num_src; i++) {
5575 emit_src_register(emit, &inst->Src[i]);
5576 }
5577 end_emit_instruction(emit);
5578
5579 return TRUE;
5580 }
5581
5582
5583 /**
5584 * We only special case the MOV instruction to try to detect constant
5585 * color writes in the fragment shader.
5586 */
5587 static boolean
5588 emit_mov(struct svga_shader_emitter_v10 *emit,
5589 const struct tgsi_full_instruction *inst)
5590 {
5591 const struct tgsi_full_src_register *src = &inst->Src[0];
5592 const struct tgsi_full_dst_register *dst = &inst->Dst[0];
5593
5594 if (emit->unit == PIPE_SHADER_FRAGMENT &&
5595 dst->Register.File == TGSI_FILE_OUTPUT &&
5596 dst->Register.Index == 0 &&
5597 src->Register.File == TGSI_FILE_CONSTANT &&
5598 !src->Register.Indirect) {
5599 emit->constant_color_output = TRUE;
5600 }
5601
5602 return emit_simple(emit, inst);
5603 }
5604
5605
5606 /**
5607 * Emit a simple VGPU10 instruction which writes to multiple dest registers,
5608 * where TGSI only uses one dest register.
5609 */
5610 static boolean
5611 emit_simple_1dst(struct svga_shader_emitter_v10 *emit,
5612 const struct tgsi_full_instruction *inst,
5613 unsigned dst_count,
5614 unsigned dst_index)
5615 {
5616 const unsigned opcode = inst->Instruction.Opcode;
5617 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
5618 unsigned i;
5619
5620 begin_emit_instruction(emit);
5621 emit_opcode(emit, translate_opcode(inst->Instruction.Opcode),
5622 inst->Instruction.Saturate);
5623
5624 for (i = 0; i < dst_count; i++) {
5625 if (i == dst_index) {
5626 emit_dst_register(emit, &inst->Dst[0]);
5627 } else {
5628 emit_null_dst_register(emit);
5629 }
5630 }
5631
5632 for (i = 0; i < op->num_src; i++) {
5633 emit_src_register(emit, &inst->Src[i]);
5634 }
5635 end_emit_instruction(emit);
5636
5637 return TRUE;
5638 }
5639
5640
5641 /**
5642 * Translate a single TGSI instruction to VGPU10.
5643 */
5644 static boolean
5645 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
5646 unsigned inst_number,
5647 const struct tgsi_full_instruction *inst)
5648 {
5649 const unsigned opcode = inst->Instruction.Opcode;
5650
5651 switch (opcode) {
5652 case TGSI_OPCODE_ADD:
5653 case TGSI_OPCODE_AND:
5654 case TGSI_OPCODE_BGNLOOP:
5655 case TGSI_OPCODE_BRK:
5656 case TGSI_OPCODE_CEIL:
5657 case TGSI_OPCODE_CONT:
5658 case TGSI_OPCODE_DDX:
5659 case TGSI_OPCODE_DDY:
5660 case TGSI_OPCODE_DIV:
5661 case TGSI_OPCODE_DP2:
5662 case TGSI_OPCODE_DP3:
5663 case TGSI_OPCODE_DP4:
5664 case TGSI_OPCODE_ELSE:
5665 case TGSI_OPCODE_ENDIF:
5666 case TGSI_OPCODE_ENDLOOP:
5667 case TGSI_OPCODE_ENDSUB:
5668 case TGSI_OPCODE_F2I:
5669 case TGSI_OPCODE_F2U:
5670 case TGSI_OPCODE_FLR:
5671 case TGSI_OPCODE_FRC:
5672 case TGSI_OPCODE_FSEQ:
5673 case TGSI_OPCODE_FSGE:
5674 case TGSI_OPCODE_FSLT:
5675 case TGSI_OPCODE_FSNE:
5676 case TGSI_OPCODE_I2F:
5677 case TGSI_OPCODE_IMAX:
5678 case TGSI_OPCODE_IMIN:
5679 case TGSI_OPCODE_INEG:
5680 case TGSI_OPCODE_ISGE:
5681 case TGSI_OPCODE_ISHR:
5682 case TGSI_OPCODE_ISLT:
5683 case TGSI_OPCODE_MAD:
5684 case TGSI_OPCODE_MAX:
5685 case TGSI_OPCODE_MIN:
5686 case TGSI_OPCODE_MUL:
5687 case TGSI_OPCODE_NOP:
5688 case TGSI_OPCODE_NOT:
5689 case TGSI_OPCODE_OR:
5690 case TGSI_OPCODE_RET:
5691 case TGSI_OPCODE_UADD:
5692 case TGSI_OPCODE_USEQ:
5693 case TGSI_OPCODE_USGE:
5694 case TGSI_OPCODE_USLT:
5695 case TGSI_OPCODE_UMIN:
5696 case TGSI_OPCODE_UMAD:
5697 case TGSI_OPCODE_UMAX:
5698 case TGSI_OPCODE_ROUND:
5699 case TGSI_OPCODE_SQRT:
5700 case TGSI_OPCODE_SHL:
5701 case TGSI_OPCODE_TRUNC:
5702 case TGSI_OPCODE_U2F:
5703 case TGSI_OPCODE_UCMP:
5704 case TGSI_OPCODE_USHR:
5705 case TGSI_OPCODE_USNE:
5706 case TGSI_OPCODE_XOR:
5707 /* simple instructions */
5708 return emit_simple(emit, inst);
5709
5710 case TGSI_OPCODE_MOV:
5711 return emit_mov(emit, inst);
5712 case TGSI_OPCODE_EMIT:
5713 return emit_vertex(emit, inst);
5714 case TGSI_OPCODE_ENDPRIM:
5715 return emit_endprim(emit, inst);
5716 case TGSI_OPCODE_ABS:
5717 return emit_abs(emit, inst);
5718 case TGSI_OPCODE_IABS:
5719 return emit_iabs(emit, inst);
5720 case TGSI_OPCODE_ARL:
5721 /* fall-through */
5722 case TGSI_OPCODE_UARL:
5723 return emit_arl_uarl(emit, inst);
5724 case TGSI_OPCODE_BGNSUB:
5725 /* no-op */
5726 return TRUE;
5727 case TGSI_OPCODE_CAL:
5728 return emit_cal(emit, inst);
5729 case TGSI_OPCODE_CMP:
5730 return emit_cmp(emit, inst);
5731 case TGSI_OPCODE_COS:
5732 return emit_sincos(emit, inst);
5733 case TGSI_OPCODE_DP2A:
5734 return emit_dp2a(emit, inst);
5735 case TGSI_OPCODE_DPH:
5736 return emit_dph(emit, inst);
5737 case TGSI_OPCODE_DST:
5738 return emit_dst(emit, inst);
5739 case TGSI_OPCODE_EX2:
5740 return emit_ex2(emit, inst);
5741 case TGSI_OPCODE_EXP:
5742 return emit_exp(emit, inst);
5743 case TGSI_OPCODE_IF:
5744 return emit_if(emit, inst);
5745 case TGSI_OPCODE_KILL:
5746 return emit_kill(emit, inst);
5747 case TGSI_OPCODE_KILL_IF:
5748 return emit_kill_if(emit, inst);
5749 case TGSI_OPCODE_LG2:
5750 return emit_lg2(emit, inst);
5751 case TGSI_OPCODE_LIT:
5752 return emit_lit(emit, inst);
5753 case TGSI_OPCODE_LOG:
5754 return emit_log(emit, inst);
5755 case TGSI_OPCODE_LRP:
5756 return emit_lrp(emit, inst);
5757 case TGSI_OPCODE_POW:
5758 return emit_pow(emit, inst);
5759 case TGSI_OPCODE_RCP:
5760 return emit_rcp(emit, inst);
5761 case TGSI_OPCODE_RSQ:
5762 return emit_rsq(emit, inst);
5763 case TGSI_OPCODE_SAMPLE:
5764 return emit_sample(emit, inst);
5765 case TGSI_OPCODE_SCS:
5766 return emit_scs(emit, inst);
5767 case TGSI_OPCODE_SEQ:
5768 return emit_seq(emit, inst);
5769 case TGSI_OPCODE_SGE:
5770 return emit_sge(emit, inst);
5771 case TGSI_OPCODE_SGT:
5772 return emit_sgt(emit, inst);
5773 case TGSI_OPCODE_SIN:
5774 return emit_sincos(emit, inst);
5775 case TGSI_OPCODE_SLE:
5776 return emit_sle(emit, inst);
5777 case TGSI_OPCODE_SLT:
5778 return emit_slt(emit, inst);
5779 case TGSI_OPCODE_SNE:
5780 return emit_sne(emit, inst);
5781 case TGSI_OPCODE_SSG:
5782 return emit_ssg(emit, inst);
5783 case TGSI_OPCODE_ISSG:
5784 return emit_issg(emit, inst);
5785 case TGSI_OPCODE_SUB:
5786 return emit_sub(emit, inst);
5787 case TGSI_OPCODE_TEX:
5788 return emit_tex(emit, inst);
5789 case TGSI_OPCODE_TXP:
5790 return emit_txp(emit, inst);
5791 case TGSI_OPCODE_TXB:
5792 case TGSI_OPCODE_TXB2:
5793 case TGSI_OPCODE_TXL:
5794 return emit_txl_txb(emit, inst);
5795 case TGSI_OPCODE_TXD:
5796 return emit_txd(emit, inst);
5797 case TGSI_OPCODE_TXF:
5798 return emit_txf(emit, inst);
5799 case TGSI_OPCODE_TXQ:
5800 return emit_txq(emit, inst);
5801 case TGSI_OPCODE_UIF:
5802 return emit_if(emit, inst);
5803 case TGSI_OPCODE_XPD:
5804 return emit_xpd(emit, inst);
5805 case TGSI_OPCODE_UMUL_HI:
5806 case TGSI_OPCODE_IMUL_HI:
5807 case TGSI_OPCODE_UDIV:
5808 case TGSI_OPCODE_IDIV:
5809 /* These cases use only the FIRST of two destination registers */
5810 return emit_simple_1dst(emit, inst, 2, 0);
5811 case TGSI_OPCODE_UMUL:
5812 case TGSI_OPCODE_UMOD:
5813 case TGSI_OPCODE_MOD:
5814 /* These cases use only the SECOND of two destination registers */
5815 return emit_simple_1dst(emit, inst, 2, 1);
5816 case TGSI_OPCODE_END:
5817 if (!emit_post_helpers(emit))
5818 return FALSE;
5819 return emit_simple(emit, inst);
5820
5821 default:
5822 debug_printf("Unimplemented tgsi instruction %s\n",
5823 tgsi_get_opcode_name(opcode));
5824 return FALSE;
5825 }
5826
5827 return TRUE;
5828 }
5829
5830
5831 /**
5832 * Emit the extra instructions to adjust the vertex position.
5833 * There are two possible adjustments:
5834 * 1. Converting from Gallium to VGPU10 coordinate space by applying the
5835 * "prescale" and "pretranslate" values.
5836 * 2. Undoing the viewport transformation when we use the swtnl/draw path.
5837 * \param vs_pos_tmp_index which temporary register contains the vertex pos.
5838 */
5839 static void
5840 emit_vpos_instructions(struct svga_shader_emitter_v10 *emit,
5841 unsigned vs_pos_tmp_index)
5842 {
5843 struct tgsi_full_src_register tmp_pos_src;
5844 struct tgsi_full_dst_register pos_dst;
5845
5846 /* Don't bother to emit any extra vertex instructions if vertex position is
5847 * not written out
5848 */
5849 if (emit->vposition.out_index == INVALID_INDEX)
5850 return;
5851
5852 tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index);
5853 pos_dst = make_dst_output_reg(emit->vposition.out_index);
5854
5855 /* If non-adjusted vertex position register index
5856 * is valid, copy the vertex position from the temporary
5857 * vertex position register before it is modified by the
5858 * prescale computation.
5859 */
5860 if (emit->vposition.so_index != INVALID_INDEX) {
5861 struct tgsi_full_dst_register pos_so_dst =
5862 make_dst_output_reg(emit->vposition.so_index);
5863
5864 /* MOV pos_so, tmp_pos */
5865 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst,
5866 &tmp_pos_src, FALSE);
5867 }
5868
5869 if (emit->vposition.need_prescale) {
5870 /* This code adjusts the vertex position to match the VGPU10 convention.
5871 * If p is the position computed by the shader (usually by applying the
5872 * modelview and projection matrices), the new position q is computed by:
5873 *
5874 * q.x = p.w * trans.x + p.x * scale.x
5875 * q.y = p.w * trans.y + p.y * scale.y
5876 * q.z = p.w * trans.z + p.z * scale.z;
5877 * q.w = p.w * trans.w + p.w;
5878 */
5879 struct tgsi_full_src_register tmp_pos_src_w =
5880 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
5881 struct tgsi_full_dst_register tmp_pos_dst =
5882 make_dst_temp_reg(vs_pos_tmp_index);
5883 struct tgsi_full_dst_register tmp_pos_dst_xyz =
5884 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ);
5885
5886 struct tgsi_full_src_register prescale_scale =
5887 make_src_const_reg(emit->vposition.prescale_scale_index);
5888 struct tgsi_full_src_register prescale_trans =
5889 make_src_const_reg(emit->vposition.prescale_trans_index);
5890
5891 /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */
5892 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz,
5893 &tmp_pos_src, &prescale_scale, FALSE);
5894
5895 /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */
5896 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w,
5897 &prescale_trans, &tmp_pos_src, FALSE);
5898 }
5899 else if (emit->key.vs.undo_viewport) {
5900 /* This code computes the final vertex position from the temporary
5901 * vertex position by undoing the viewport transformation and the
5902 * divide-by-W operation (we convert window coords back to clip coords).
5903 * This is needed when we use the 'draw' module for fallbacks.
5904 * If p is the temp pos in window coords, then the NDC coord q is:
5905 * q.x = (p.x - vp.x_trans) / vp.x_scale * p.w
5906 * q.y = (p.y - vp.y_trans) / vp.y_scale * p.w
5907 * q.z = p.z * p.w
5908 * q.w = p.w
5909 * CONST[vs_viewport_index] contains:
5910 * { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans }
5911 */
5912 struct tgsi_full_dst_register tmp_pos_dst =
5913 make_dst_temp_reg(vs_pos_tmp_index);
5914 struct tgsi_full_dst_register tmp_pos_dst_xy =
5915 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY);
5916 struct tgsi_full_src_register tmp_pos_src_wwww =
5917 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
5918
5919 struct tgsi_full_dst_register pos_dst_xyz =
5920 writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ);
5921 struct tgsi_full_dst_register pos_dst_w =
5922 writemask_dst(&pos_dst, TGSI_WRITEMASK_W);
5923
5924 struct tgsi_full_src_register vp_xyzw =
5925 make_src_const_reg(emit->vs.viewport_index);
5926 struct tgsi_full_src_register vp_zwww =
5927 swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
5928 TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
5929
5930 /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */
5931 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy,
5932 &tmp_pos_src, &vp_zwww, FALSE);
5933
5934 /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */
5935 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy,
5936 &tmp_pos_src, &vp_xyzw, FALSE);
5937
5938 /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */
5939 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz,
5940 &tmp_pos_src, &tmp_pos_src_wwww, FALSE);
5941
5942 /* MOV pos.w, tmp_pos.w */
5943 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w,
5944 &tmp_pos_src, FALSE);
5945 }
5946 else if (vs_pos_tmp_index != INVALID_INDEX) {
5947 /* This code is to handle the case where the temporary vertex
5948 * position register is created when the vertex shader has stream
5949 * output and prescale is disabled because rasterization is to be
5950 * discarded.
5951 */
5952 struct tgsi_full_dst_register pos_dst =
5953 make_dst_output_reg(emit->vposition.out_index);
5954
5955 /* MOV pos, tmp_pos */
5956 begin_emit_instruction(emit);
5957 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
5958 emit_dst_register(emit, &pos_dst);
5959 emit_src_register(emit, &tmp_pos_src);
5960 end_emit_instruction(emit);
5961 }
5962 }
5963
5964 static void
5965 emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
5966 {
5967 if (emit->clip_mode == CLIP_DISTANCE) {
5968 /* Copy from copy distance temporary to CLIPDIST & the shadow copy */
5969 emit_clip_distance_instructions(emit);
5970
5971 } else if (emit->clip_mode == CLIP_VERTEX) {
5972 /* Convert TGSI CLIPVERTEX to CLIPDIST */
5973 emit_clip_vertex_instructions(emit);
5974 }
5975
5976 /**
5977 * Emit vertex position and take care of legacy user planes only if
5978 * there is a valid vertex position register index.
5979 * This is to take care of the case
5980 * where the shader doesn't output vertex position. Then in
5981 * this case, don't bother to emit more vertex instructions.
5982 */
5983 if (emit->vposition.out_index == INVALID_INDEX)
5984 return;
5985
5986 /**
5987 * Emit per-vertex clipping instructions for legacy user defined clip planes.
5988 * NOTE: we must emit the clip distance instructions before the
5989 * emit_vpos_instructions() call since the later function will change
5990 * the TEMP[vs_pos_tmp_index] value.
5991 */
5992 if (emit->clip_mode == CLIP_LEGACY) {
5993 /* Emit CLIPDIST for legacy user defined clip planes */
5994 emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index);
5995 }
5996 }
5997
5998
5999 /**
6000 * Emit extra per-vertex instructions. This includes clip-coordinate
6001 * space conversion and computing clip distances. This is called for
6002 * each GS emit-vertex instruction and at the end of VS translation.
6003 */
6004 static void
6005 emit_vertex_instructions(struct svga_shader_emitter_v10 *emit)
6006 {
6007 const unsigned vs_pos_tmp_index = emit->vposition.tmp_index;
6008
6009 /* Emit clipping instructions based on clipping mode */
6010 emit_clipping_instructions(emit);
6011
6012 /**
6013 * Reset the temporary vertex position register index
6014 * so that emit_dst_register() will use the real vertex position output
6015 */
6016 emit->vposition.tmp_index = INVALID_INDEX;
6017
6018 /* Emit vertex position instructions */
6019 emit_vpos_instructions(emit, vs_pos_tmp_index);
6020
6021 /* Restore original vposition.tmp_index value for the next GS vertex.
6022 * It doesn't matter for VS.
6023 */
6024 emit->vposition.tmp_index = vs_pos_tmp_index;
6025 }
6026
6027 /**
6028 * Translate the TGSI_OPCODE_EMIT GS instruction.
6029 */
6030 static boolean
6031 emit_vertex(struct svga_shader_emitter_v10 *emit,
6032 const struct tgsi_full_instruction *inst)
6033 {
6034 unsigned ret = TRUE;
6035
6036 assert(emit->unit == PIPE_SHADER_GEOMETRY);
6037
6038 emit_vertex_instructions(emit);
6039
6040 /* We can't use emit_simple() because the TGSI instruction has one
6041 * operand (vertex stream number) which we must ignore for VGPU10.
6042 */
6043 begin_emit_instruction(emit);
6044 emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE);
6045 end_emit_instruction(emit);
6046
6047 return ret;
6048 }
6049
6050
6051 /**
6052 * Emit the extra code to convert from VGPU10's boolean front-face
6053 * register to TGSI's signed front-face register.
6054 *
6055 * TODO: Make temporary front-face register a scalar.
6056 */
6057 static void
6058 emit_frontface_instructions(struct svga_shader_emitter_v10 *emit)
6059 {
6060 assert(emit->unit == PIPE_SHADER_FRAGMENT);
6061
6062 if (emit->fs.face_input_index != INVALID_INDEX) {
6063 /* convert vgpu10 boolean face register to gallium +/-1 value */
6064 struct tgsi_full_dst_register tmp_dst =
6065 make_dst_temp_reg(emit->fs.face_tmp_index);
6066 struct tgsi_full_src_register one =
6067 make_immediate_reg_float(emit, 1.0f);
6068 struct tgsi_full_src_register neg_one =
6069 make_immediate_reg_float(emit, -1.0f);
6070
6071 /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */
6072 begin_emit_instruction(emit);
6073 emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE);
6074 emit_dst_register(emit, &tmp_dst);
6075 emit_face_register(emit);
6076 emit_src_register(emit, &one);
6077 emit_src_register(emit, &neg_one);
6078 end_emit_instruction(emit);
6079 }
6080 }
6081
6082
6083 /**
6084 * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w.
6085 */
6086 static void
6087 emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit)
6088 {
6089 assert(emit->unit == PIPE_SHADER_FRAGMENT);
6090
6091 if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
6092 struct tgsi_full_dst_register tmp_dst =
6093 make_dst_temp_reg(emit->fs.fragcoord_tmp_index);
6094 struct tgsi_full_dst_register tmp_dst_xyz =
6095 writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ);
6096 struct tgsi_full_dst_register tmp_dst_w =
6097 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
6098 struct tgsi_full_src_register one =
6099 make_immediate_reg_float(emit, 1.0f);
6100 struct tgsi_full_src_register fragcoord =
6101 make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index);
6102
6103 /* save the input index */
6104 unsigned fragcoord_input_index = emit->fs.fragcoord_input_index;
6105 /* set to invalid to prevent substitution in emit_src_register() */
6106 emit->fs.fragcoord_input_index = INVALID_INDEX;
6107
6108 /* MOV fragcoord_tmp.xyz, fragcoord.xyz */
6109 begin_emit_instruction(emit);
6110 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
6111 emit_dst_register(emit, &tmp_dst_xyz);
6112 emit_src_register(emit, &fragcoord);
6113 end_emit_instruction(emit);
6114
6115 /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */
6116 begin_emit_instruction(emit);
6117 emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE);
6118 emit_dst_register(emit, &tmp_dst_w);
6119 emit_src_register(emit, &one);
6120 emit_src_register(emit, &fragcoord);
6121 end_emit_instruction(emit);
6122
6123 /* restore saved value */
6124 emit->fs.fragcoord_input_index = fragcoord_input_index;
6125 }
6126 }
6127
6128
6129 /**
6130 * Emit extra instructions to adjust VS inputs/attributes. This can
6131 * mean casting a vertex attribute from int to float or setting the
6132 * W component to 1, or both.
6133 */
6134 static void
6135 emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
6136 {
6137 const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1;
6138 const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof;
6139 const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof;
6140 const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra;
6141 const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm;
6142 const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled;
6143 const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled;
6144
6145 unsigned adjust_mask = (save_w_1_mask |
6146 save_itof_mask |
6147 save_utof_mask |
6148 save_is_bgra_mask |
6149 save_puint_to_snorm_mask |
6150 save_puint_to_uscaled_mask |
6151 save_puint_to_sscaled_mask);
6152
6153 assert(emit->unit == PIPE_SHADER_VERTEX);
6154
6155 if (adjust_mask) {
6156 struct tgsi_full_src_register one =
6157 make_immediate_reg_float(emit, 1.0f);
6158
6159 struct tgsi_full_src_register one_int =
6160 make_immediate_reg_int(emit, 1);
6161
6162 /* We need to turn off these bitmasks while emitting the
6163 * instructions below, then restore them afterward.
6164 */
6165 emit->key.vs.adjust_attrib_w_1 = 0;
6166 emit->key.vs.adjust_attrib_itof = 0;
6167 emit->key.vs.adjust_attrib_utof = 0;
6168 emit->key.vs.attrib_is_bgra = 0;
6169 emit->key.vs.attrib_puint_to_snorm = 0;
6170 emit->key.vs.attrib_puint_to_uscaled = 0;
6171 emit->key.vs.attrib_puint_to_sscaled = 0;
6172
6173 while (adjust_mask) {
6174 unsigned index = u_bit_scan(&adjust_mask);
6175
6176 /* skip the instruction if this vertex attribute is not being used */
6177 if (emit->info.input_usage_mask[index] == 0)
6178 continue;
6179
6180 unsigned tmp = emit->vs.adjusted_input[index];
6181 struct tgsi_full_src_register input_src =
6182 make_src_reg(TGSI_FILE_INPUT, index);
6183
6184 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6185 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6186 struct tgsi_full_dst_register tmp_dst_w =
6187 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
6188
6189 /* ITOF/UTOF/MOV tmp, input[index] */
6190 if (save_itof_mask & (1 << index)) {
6191 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF,
6192 &tmp_dst, &input_src, FALSE);
6193 }
6194 else if (save_utof_mask & (1 << index)) {
6195 emit_instruction_op1(emit, VGPU10_OPCODE_UTOF,
6196 &tmp_dst, &input_src, FALSE);
6197 }
6198 else if (save_puint_to_snorm_mask & (1 << index)) {
6199 emit_puint_to_snorm(emit, &tmp_dst, &input_src);
6200 }
6201 else if (save_puint_to_uscaled_mask & (1 << index)) {
6202 emit_puint_to_uscaled(emit, &tmp_dst, &input_src);
6203 }
6204 else if (save_puint_to_sscaled_mask & (1 << index)) {
6205 emit_puint_to_sscaled(emit, &tmp_dst, &input_src);
6206 }
6207 else {
6208 assert((save_w_1_mask | save_is_bgra_mask) & (1 << index));
6209 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
6210 &tmp_dst, &input_src, FALSE);
6211 }
6212
6213 if (save_is_bgra_mask & (1 << index)) {
6214 emit_swap_r_b(emit, &tmp_dst, &tmp_src);
6215 }
6216
6217 if (save_w_1_mask & (1 << index)) {
6218 /* MOV tmp.w, 1.0 */
6219 if (emit->key.vs.attrib_is_pure_int & (1 << index)) {
6220 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
6221 &tmp_dst_w, &one_int, FALSE);
6222 }
6223 else {
6224 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
6225 &tmp_dst_w, &one, FALSE);
6226 }
6227 }
6228 }
6229
6230 emit->key.vs.adjust_attrib_w_1 = save_w_1_mask;
6231 emit->key.vs.adjust_attrib_itof = save_itof_mask;
6232 emit->key.vs.adjust_attrib_utof = save_utof_mask;
6233 emit->key.vs.attrib_is_bgra = save_is_bgra_mask;
6234 emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask;
6235 emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask;
6236 emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask;
6237 }
6238 }
6239
6240
6241 /**
6242 * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
6243 * to implement some instructions. We pre-allocate those values here
6244 * in the immediate constant buffer.
6245 */
6246 static void
6247 alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
6248 {
6249 unsigned n = 0;
6250
6251 emit->common_immediate_pos[n++] =
6252 alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
6253
6254 emit->common_immediate_pos[n++] =
6255 alloc_immediate_float4(emit, 128.0f, -128.0f, 2.0f, 3.0f);
6256
6257 emit->common_immediate_pos[n++] =
6258 alloc_immediate_int4(emit, 0, 1, 0, -1);
6259
6260 if (emit->key.vs.attrib_puint_to_snorm) {
6261 emit->common_immediate_pos[n++] =
6262 alloc_immediate_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
6263 }
6264
6265 if (emit->key.vs.attrib_puint_to_uscaled) {
6266 emit->common_immediate_pos[n++] =
6267 alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f);
6268 }
6269
6270 if (emit->key.vs.attrib_puint_to_sscaled) {
6271 emit->common_immediate_pos[n++] =
6272 alloc_immediate_int4(emit, 22, 12, 2, 0);
6273
6274 emit->common_immediate_pos[n++] =
6275 alloc_immediate_int4(emit, 22, 30, 0, 0);
6276 }
6277
6278 assert(n <= Elements(emit->common_immediate_pos));
6279 emit->num_common_immediates = n;
6280 }
6281
6282
6283 /**
6284 * Emit any extra/helper declarations/code that we might need between
6285 * the declaration section and code section.
6286 */
6287 static boolean
6288 emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
6289 {
6290 /* Properties */
6291 if (emit->unit == PIPE_SHADER_GEOMETRY)
6292 emit_property_instructions(emit);
6293
6294 /* Declare inputs */
6295 if (!emit_input_declarations(emit))
6296 return FALSE;
6297
6298 /* Declare outputs */
6299 if (!emit_output_declarations(emit))
6300 return FALSE;
6301
6302 /* Declare temporary registers */
6303 emit_temporaries_declaration(emit);
6304
6305 /* Declare constant registers */
6306 emit_constant_declaration(emit);
6307
6308 /* Declare samplers and resources */
6309 emit_sampler_declarations(emit);
6310 emit_resource_declarations(emit);
6311
6312 /* Declare clip distance output registers */
6313 if (emit->unit == PIPE_SHADER_VERTEX ||
6314 emit->unit == PIPE_SHADER_GEOMETRY) {
6315 emit_clip_distance_declarations(emit);
6316 }
6317
6318 alloc_common_immediates(emit);
6319
6320 if (emit->unit == PIPE_SHADER_FRAGMENT &&
6321 emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
6322 float alpha = emit->key.fs.alpha_ref;
6323 emit->fs.alpha_ref_index =
6324 alloc_immediate_float4(emit, alpha, alpha, alpha, alpha);
6325 }
6326
6327 /* Now, emit the constant block containing all the immediates
6328 * declared by shader, as well as the extra ones seen above.
6329 */
6330 emit_vgpu10_immediates_block(emit);
6331
6332 if (emit->unit == PIPE_SHADER_FRAGMENT) {
6333 emit_frontface_instructions(emit);
6334 emit_fragcoord_instructions(emit);
6335 }
6336 else if (emit->unit == PIPE_SHADER_VERTEX) {
6337 emit_vertex_attrib_instructions(emit);
6338 }
6339
6340 return TRUE;
6341 }
6342
6343
6344 /**
6345 * Emit alpha test code. This compares TEMP[fs_color_tmp_index].w
6346 * against the alpha reference value and discards the fragment if the
6347 * comparison fails.
6348 */
6349 static void
6350 emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
6351 unsigned fs_color_tmp_index)
6352 {
6353 /* compare output color's alpha to alpha ref and kill */
6354 unsigned tmp = get_temp_index(emit);
6355 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6356 struct tgsi_full_src_register tmp_src_x =
6357 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
6358 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6359 struct tgsi_full_src_register color_src =
6360 make_src_temp_reg(fs_color_tmp_index);
6361 struct tgsi_full_src_register color_src_w =
6362 scalar_src(&color_src, TGSI_SWIZZLE_W);
6363 struct tgsi_full_src_register ref_src =
6364 make_src_immediate_reg(emit->fs.alpha_ref_index);
6365 struct tgsi_full_dst_register color_dst =
6366 make_dst_output_reg(emit->fs.color_out_index[0]);
6367
6368 assert(emit->unit == PIPE_SHADER_FRAGMENT);
6369
6370 /* dst = src0 'alpha_func' src1 */
6371 emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst,
6372 &color_src_w, &ref_src);
6373
6374 /* DISCARD if dst.x == 0 */
6375 begin_emit_instruction(emit);
6376 emit_discard_opcode(emit, FALSE); /* discard if src0.x is zero */
6377 emit_src_register(emit, &tmp_src_x);
6378 end_emit_instruction(emit);
6379
6380 /* If we don't need to broadcast the color below or set fragments to
6381 * white, emit final color here.
6382 */
6383 if (emit->key.fs.write_color0_to_n_cbufs <= 1 &&
6384 !emit->key.fs.white_fragments) {
6385 /* MOV output.color, tempcolor */
6386 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
6387 &color_src, FALSE); /* XXX saturate? */
6388 }
6389
6390 free_temp_indexes(emit);
6391 }
6392
6393
6394 /**
6395 * When we need to emit white for all fragments (for emulating XOR logicop
6396 * mode), this function copies white into the temporary color output register.
6397 */
6398 static void
6399 emit_set_color_white(struct svga_shader_emitter_v10 *emit,
6400 unsigned fs_color_tmp_index)
6401 {
6402 struct tgsi_full_dst_register color_dst =
6403 make_dst_temp_reg(fs_color_tmp_index);
6404 struct tgsi_full_src_register white =
6405 make_immediate_reg_float(emit, 1.0f);
6406
6407 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &white, FALSE);
6408 }
6409
6410
6411 /**
6412 * Emit instructions for writing a single color output to multiple
6413 * color buffers.
6414 * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or
6415 * when key.fs.white_fragments is true).
6416 * property is set and the number of render targets is greater than one.
6417 * \param fs_color_tmp_index index of the temp register that holds the
6418 * color to broadcast.
6419 */
6420 static void
6421 emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
6422 unsigned fs_color_tmp_index)
6423 {
6424 const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
6425 unsigned i;
6426 struct tgsi_full_src_register color_src =
6427 make_src_temp_reg(fs_color_tmp_index);
6428
6429 assert(emit->unit == PIPE_SHADER_FRAGMENT);
6430
6431 for (i = 0; i < n; i++) {
6432 unsigned output_reg = emit->fs.color_out_index[i];
6433 struct tgsi_full_dst_register color_dst =
6434 make_dst_output_reg(output_reg);
6435
6436 /* Fill in this semantic here since we'll use it later in
6437 * emit_dst_register().
6438 */
6439 emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR;
6440
6441 /* MOV output.color[i], tempcolor */
6442 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
6443 &color_src, FALSE); /* XXX saturate? */
6444 }
6445 }
6446
6447
6448 /**
6449 * Emit extra helper code after the original shader code, but before the
6450 * last END/RET instruction.
6451 * For vertex shaders this means emitting the extra code to apply the
6452 * prescale scale/translation.
6453 */
6454 static boolean
6455 emit_post_helpers(struct svga_shader_emitter_v10 *emit)
6456 {
6457 if (emit->unit == PIPE_SHADER_VERTEX) {
6458 emit_vertex_instructions(emit);
6459 }
6460 else if (emit->unit == PIPE_SHADER_FRAGMENT) {
6461 const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
6462
6463 /* We no longer want emit_dst_register() to substitute the
6464 * temporary fragment color register for the real color output.
6465 */
6466 emit->fs.color_tmp_index = INVALID_INDEX;
6467
6468 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
6469 emit_alpha_test_instructions(emit, fs_color_tmp_index);
6470 }
6471 if (emit->key.fs.white_fragments) {
6472 emit_set_color_white(emit, fs_color_tmp_index);
6473 }
6474 if (emit->key.fs.write_color0_to_n_cbufs > 1 ||
6475 emit->key.fs.white_fragments) {
6476 emit_broadcast_color_instructions(emit, fs_color_tmp_index);
6477 }
6478 }
6479
6480 return TRUE;
6481 }
6482
6483
6484 /**
6485 * Translate the TGSI tokens into VGPU10 tokens.
6486 */
6487 static boolean
6488 emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
6489 const struct tgsi_token *tokens)
6490 {
6491 struct tgsi_parse_context parse;
6492 boolean ret = TRUE;
6493 boolean pre_helpers_emitted = FALSE;
6494 unsigned inst_number = 0;
6495
6496 tgsi_parse_init(&parse, tokens);
6497
6498 while (!tgsi_parse_end_of_tokens(&parse)) {
6499 tgsi_parse_token(&parse);
6500
6501 switch (parse.FullToken.Token.Type) {
6502 case TGSI_TOKEN_TYPE_IMMEDIATE:
6503 ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate);
6504 if (!ret)
6505 goto done;
6506 break;
6507
6508 case TGSI_TOKEN_TYPE_DECLARATION:
6509 ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration);
6510 if (!ret)
6511 goto done;
6512 break;
6513
6514 case TGSI_TOKEN_TYPE_INSTRUCTION:
6515 if (!pre_helpers_emitted) {
6516 ret = emit_pre_helpers(emit);
6517 if (!ret)
6518 goto done;
6519 pre_helpers_emitted = TRUE;
6520 }
6521 ret = emit_vgpu10_instruction(emit, inst_number++,
6522 &parse.FullToken.FullInstruction);
6523 if (!ret)
6524 goto done;
6525 break;
6526
6527 case TGSI_TOKEN_TYPE_PROPERTY:
6528 ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty);
6529 if (!ret)
6530 goto done;
6531 break;
6532
6533 default:
6534 break;
6535 }
6536 }
6537
6538 done:
6539 tgsi_parse_free(&parse);
6540 return ret;
6541 }
6542
6543
6544 /**
6545 * Emit the first VGPU10 shader tokens.
6546 */
6547 static boolean
6548 emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
6549 {
6550 VGPU10ProgramToken ptoken;
6551
6552 /* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */
6553 ptoken.majorVersion = 4;
6554 ptoken.minorVersion = 0;
6555 ptoken.programType = translate_shader_type(emit->unit);
6556 if (!emit_dword(emit, ptoken.value))
6557 return FALSE;
6558
6559 /* Second token: total length of shader, in tokens. We can't fill this
6560 * in until we're all done. Emit zero for now.
6561 */
6562 return emit_dword(emit, 0);
6563 }
6564
6565
6566 static boolean
6567 emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit)
6568 {
6569 VGPU10ProgramToken *tokens;
6570
6571 /* Replace the second token with total shader length */
6572 tokens = (VGPU10ProgramToken *) emit->buf;
6573 tokens[1].value = emit_get_num_tokens(emit);
6574
6575 return TRUE;
6576 }
6577
6578
6579 /**
6580 * Modify the FS to read the BCOLORs and use the FACE register
6581 * to choose between the front/back colors.
6582 */
6583 static const struct tgsi_token *
6584 transform_fs_twoside(const struct tgsi_token *tokens)
6585 {
6586 if (0) {
6587 debug_printf("Before tgsi_add_two_side ------------------\n");
6588 tgsi_dump(tokens,0);
6589 }
6590 tokens = tgsi_add_two_side(tokens);
6591 if (0) {
6592 debug_printf("After tgsi_add_two_side ------------------\n");
6593 tgsi_dump(tokens, 0);
6594 }
6595 return tokens;
6596 }
6597
6598
6599 /**
6600 * Modify the FS to do polygon stipple.
6601 */
6602 static const struct tgsi_token *
6603 transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
6604 const struct tgsi_token *tokens)
6605 {
6606 const struct tgsi_token *new_tokens;
6607 unsigned unit;
6608
6609 if (0) {
6610 debug_printf("Before pstipple ------------------\n");
6611 tgsi_dump(tokens,0);
6612 }
6613
6614 new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
6615 TGSI_FILE_INPUT);
6616
6617 emit->fs.pstipple_sampler_unit = unit;
6618
6619 /* Setup texture state for stipple */
6620 emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D;
6621 emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
6622 emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
6623 emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
6624 emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
6625
6626 if (0) {
6627 debug_printf("After pstipple ------------------\n");
6628 tgsi_dump(new_tokens, 0);
6629 }
6630
6631 return new_tokens;
6632 }
6633
6634 /**
6635 * Modify the FS to support anti-aliasing point.
6636 */
6637 static const struct tgsi_token *
6638 transform_fs_aapoint(const struct tgsi_token *tokens,
6639 int aa_coord_index)
6640 {
6641 if (0) {
6642 debug_printf("Before tgsi_add_aa_point ------------------\n");
6643 tgsi_dump(tokens,0);
6644 }
6645 tokens = tgsi_add_aa_point(tokens, aa_coord_index);
6646 if (0) {
6647 debug_printf("After tgsi_add_aa_point ------------------\n");
6648 tgsi_dump(tokens, 0);
6649 }
6650 return tokens;
6651 }
6652
6653 /**
6654 * This is the main entrypoint for the TGSI -> VPGU10 translator.
6655 */
6656 struct svga_shader_variant *
6657 svga_tgsi_vgpu10_translate(struct svga_context *svga,
6658 const struct svga_shader *shader,
6659 const struct svga_compile_key *key,
6660 unsigned unit)
6661 {
6662 struct svga_shader_variant *variant = NULL;
6663 struct svga_shader_emitter_v10 *emit;
6664 const struct tgsi_token *tokens = shader->tokens;
6665 struct svga_vertex_shader *vs = svga->curr.vs;
6666 struct svga_geometry_shader *gs = svga->curr.gs;
6667
6668 assert(unit == PIPE_SHADER_VERTEX ||
6669 unit == PIPE_SHADER_GEOMETRY ||
6670 unit == PIPE_SHADER_FRAGMENT);
6671
6672 /* These two flags cannot be used together */
6673 assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
6674
6675 /*
6676 * Setup the code emitter
6677 */
6678 emit = alloc_emitter();
6679 if (!emit)
6680 return NULL;
6681
6682 emit->unit = unit;
6683 emit->key = *key;
6684
6685 emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
6686 emit->key.gs.need_prescale);
6687 emit->vposition.tmp_index = INVALID_INDEX;
6688 emit->vposition.so_index = INVALID_INDEX;
6689 emit->vposition.out_index = INVALID_INDEX;
6690
6691 emit->fs.color_tmp_index = INVALID_INDEX;
6692 emit->fs.face_input_index = INVALID_INDEX;
6693 emit->fs.fragcoord_input_index = INVALID_INDEX;
6694
6695 emit->gs.prim_id_index = INVALID_INDEX;
6696
6697 emit->clip_dist_out_index = INVALID_INDEX;
6698 emit->clip_dist_tmp_index = INVALID_INDEX;
6699 emit->clip_dist_so_index = INVALID_INDEX;
6700 emit->clip_vertex_out_index = INVALID_INDEX;
6701
6702 if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) {
6703 emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS;
6704 }
6705
6706 if (unit == PIPE_SHADER_FRAGMENT) {
6707 if (key->fs.light_twoside) {
6708 tokens = transform_fs_twoside(tokens);
6709 }
6710 if (key->fs.pstipple) {
6711 const struct tgsi_token *new_tokens =
6712 transform_fs_pstipple(emit, tokens);
6713 if (tokens != shader->tokens) {
6714 /* free the two-sided shader tokens */
6715 tgsi_free_tokens(tokens);
6716 }
6717 tokens = new_tokens;
6718 }
6719 if (key->fs.aa_point) {
6720 tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index);
6721 }
6722 }
6723
6724 if (SVGA_DEBUG & DEBUG_TGSI) {
6725 debug_printf("#####################################\n");
6726 debug_printf("### TGSI Shader %u\n", shader->id);
6727 tgsi_dump(tokens, 0);
6728 }
6729
6730 /**
6731 * Rescan the header if the token string is different from the one
6732 * included in the shader; otherwise, the header info is already up-to-date
6733 */
6734 if (tokens != shader->tokens) {
6735 tgsi_scan_shader(tokens, &emit->info);
6736 } else {
6737 emit->info = shader->info;
6738 }
6739
6740 emit->num_outputs = emit->info.num_outputs;
6741
6742 if (unit == PIPE_SHADER_FRAGMENT) {
6743 /* Compute FS input remapping to match the output from VS/GS */
6744 if (gs) {
6745 svga_link_shaders(&gs->base.info, &emit->info, &emit->linkage);
6746 } else {
6747 assert(vs);
6748 svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage);
6749 }
6750 } else if (unit == PIPE_SHADER_GEOMETRY) {
6751 assert(vs);
6752 svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage);
6753 }
6754
6755 determine_clipping_mode(emit);
6756
6757 if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) {
6758 if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) {
6759 /* if there is stream output declarations associated
6760 * with this shader or the shader writes to ClipDistance
6761 * then reserve extra registers for the non-adjusted vertex position
6762 * and the ClipDistance shadow copy
6763 */
6764 emit->vposition.so_index = emit->num_outputs++;
6765
6766 if (emit->clip_mode == CLIP_DISTANCE) {
6767 emit->clip_dist_so_index = emit->num_outputs++;
6768 if (emit->info.num_written_clipdistance > 4)
6769 emit->num_outputs++;
6770 }
6771 }
6772 }
6773
6774 /*
6775 * Do actual shader translation.
6776 */
6777 if (!emit_vgpu10_header(emit)) {
6778 debug_printf("svga: emit VGPU10 header failed\n");
6779 goto cleanup;
6780 }
6781
6782 if (!emit_vgpu10_instructions(emit, tokens)) {
6783 debug_printf("svga: emit VGPU10 instructions failed\n");
6784 goto cleanup;
6785 }
6786
6787 if (!emit_vgpu10_tail(emit)) {
6788 debug_printf("svga: emit VGPU10 tail failed\n");
6789 goto cleanup;
6790 }
6791
6792 if (emit->register_overflow) {
6793 goto cleanup;
6794 }
6795
6796 /*
6797 * Create, initialize the 'variant' object.
6798 */
6799 variant = svga_new_shader_variant(svga);
6800 if (!variant)
6801 goto cleanup;
6802
6803 variant->shader = shader;
6804 variant->nr_tokens = emit_get_num_tokens(emit);
6805 variant->tokens = (const unsigned *)emit->buf;
6806 emit->buf = NULL; /* buffer is no longer owed by emitter context */
6807 memcpy(&variant->key, key, sizeof(*key));
6808 variant->id = UTIL_BITMASK_INVALID_INDEX;
6809
6810 /* The extra constant starting offset starts with the number of
6811 * shader constants declared in the shader.
6812 */
6813 variant->extra_const_start = emit->num_shader_consts[0];
6814 if (key->gs.wide_point) {
6815 /**
6816 * The extra constant added in the transformed shader
6817 * for inverse viewport scale is to be supplied by the driver.
6818 * So the extra constant starting offset needs to be reduced by 1.
6819 */
6820 assert(variant->extra_const_start > 0);
6821 variant->extra_const_start--;
6822 }
6823
6824 variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
6825
6826 /* If there was exactly one write to a fragment shader output register
6827 * and it came from a constant buffer, we know all fragments will have
6828 * the same color (except for blending).
6829 */
6830 variant->constant_color_output =
6831 emit->constant_color_output && emit->num_output_writes == 1;
6832
6833 /** keep track in the variant if flat interpolation is used
6834 * for any of the varyings.
6835 */
6836 variant->uses_flat_interp = emit->uses_flat_interp;
6837
6838 if (tokens != shader->tokens) {
6839 tgsi_free_tokens(tokens);
6840 }
6841
6842 cleanup:
6843 free_emitter(emit);
6844
6845 return variant;
6846 }