Added few more stubs so that control reaches to DestroyDevice().
[mesa.git] / src / gallium / drivers / svga / svga_tgsi_vgpu10.c
1 /**********************************************************
2 * Copyright 1998-2013 VMware, Inc. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26 /**
27 * @file svga_tgsi_vgpu10.c
28 *
29 * TGSI -> VGPU10 shader translation.
30 *
31 * \author Mingcheng Chen
32 * \author Brian Paul
33 */
34
35 #include "pipe/p_compiler.h"
36 #include "pipe/p_shader_tokens.h"
37 #include "pipe/p_defines.h"
38 #include "tgsi/tgsi_build.h"
39 #include "tgsi/tgsi_dump.h"
40 #include "tgsi/tgsi_info.h"
41 #include "tgsi/tgsi_parse.h"
42 #include "tgsi/tgsi_scan.h"
43 #include "tgsi/tgsi_strings.h"
44 #include "tgsi/tgsi_two_side.h"
45 #include "tgsi/tgsi_aa_point.h"
46 #include "tgsi/tgsi_util.h"
47 #include "util/u_math.h"
48 #include "util/u_memory.h"
49 #include "util/u_bitmask.h"
50 #include "util/u_debug.h"
51 #include "util/u_pstipple.h"
52
53 #include "svga_context.h"
54 #include "svga_debug.h"
55 #include "svga_link.h"
56 #include "svga_shader.h"
57 #include "svga_tgsi.h"
58
59 #include "VGPU10ShaderTokens.h"
60
61
62 #define INVALID_INDEX 99999
63 #define MAX_INTERNAL_TEMPS 3
64 #define MAX_SYSTEM_VALUES 4
65 #define MAX_IMMEDIATE_COUNT \
66 (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
67 #define MAX_TEMP_ARRAYS 64 /* Enough? */
68
69
70 /**
71 * Clipping is complicated. There's four different cases which we
72 * handle during VS/GS shader translation:
73 */
74 enum clipping_mode
75 {
76 CLIP_NONE, /**< No clipping enabled */
77 CLIP_LEGACY, /**< The shader has no clipping declarations or code but
78 * one or more user-defined clip planes are enabled. We
79 * generate extra code to emit clip distances.
80 */
81 CLIP_DISTANCE, /**< The shader already declares clip distance output
82 * registers and has code to write to them.
83 */
84 CLIP_VERTEX /**< The shader declares a clip vertex output register and
85 * has code that writes to the register. We convert the
86 * clipvertex position into one or more clip distances.
87 */
88 };
89
90
91 /* Shader signature info */
92 struct svga_shader_signature
93 {
94 SVGA3dDXShaderSignatureHeader header;
95 SVGA3dDXShaderSignatureEntry inputs[PIPE_MAX_SHADER_INPUTS];
96 SVGA3dDXShaderSignatureEntry outputs[PIPE_MAX_SHADER_OUTPUTS];
97 SVGA3dDXShaderSignatureEntry patchConstants[PIPE_MAX_SHADER_OUTPUTS];
98 };
99
100 static inline void
101 set_shader_signature_entry(SVGA3dDXShaderSignatureEntry *e,
102 unsigned index,
103 SVGA3dDXSignatureSemanticName sgnName,
104 unsigned mask,
105 SVGA3dDXSignatureRegisterComponentType compType,
106 SVGA3dDXSignatureMinPrecision minPrecision)
107 {
108 e->registerIndex = index;
109 e->semanticName = sgnName;
110 e->mask = mask;
111 e->componentType = compType;
112 e->minPrecision = minPrecision;
113 };
114
115 static const SVGA3dDXSignatureSemanticName
116 tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_COUNT] = {
117 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION,
118 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
119 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
120 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
121 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
122 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
123 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
124 SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE,
125 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
126 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID,
127 SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
128 SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
129 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
130 SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE,
131 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
132 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
133 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
134 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
135 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
136 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
137 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
138 SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX,
139 SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX,
140 SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX,
141 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
142 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
143 SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
144 SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
145 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
146 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
147 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
148 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
149 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
150 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
151 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
152 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
153 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
154 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
155 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
156 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
157 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
158 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
159 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
160 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
161 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
162 };
163
164
165 /**
166 * Map tgsi semantic name to SVGA signature semantic name
167 */
168 static inline SVGA3dDXSignatureSemanticName
169 map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name)
170 {
171 assert(name < TGSI_SEMANTIC_COUNT);
172
173 /* Do a few asserts here to spot check the mapping */
174 assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_PRIMID] ==
175 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
176 assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_VIEWPORT_INDEX] ==
177 SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX);
178 assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_INVOCATIONID] ==
179 SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID);
180
181 return tgsi_semantic_to_sgn_name[name];
182 }
183
184
185 struct svga_shader_emitter_v10
186 {
187 /* The token output buffer */
188 unsigned size;
189 char *buf;
190 char *ptr;
191
192 /* Information about the shader and state (does not change) */
193 struct svga_compile_key key;
194 struct tgsi_shader_info info;
195 unsigned unit;
196 unsigned version; /**< Either 40 or 41 at this time */
197
198 unsigned cur_tgsi_token; /**< current tgsi token position */
199 unsigned inst_start_token;
200 boolean discard_instruction; /**< throw away current instruction? */
201 boolean reemit_instruction; /**< reemit current instruction */
202 boolean skip_instruction; /**< skip current instruction */
203
204 union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4];
205 double (*immediates_dbl)[2];
206 unsigned num_immediates; /**< Number of immediates emitted */
207 unsigned common_immediate_pos[10]; /**< literals for common immediates */
208 unsigned num_common_immediates;
209 boolean immediates_emitted;
210
211 unsigned num_outputs; /**< include any extra outputs */
212 /** The first extra output is reserved for
213 * non-adjusted vertex position for
214 * stream output purpose
215 */
216
217 /* Temporary Registers */
218 unsigned num_shader_temps; /**< num of temps used by original shader */
219 unsigned internal_temp_count; /**< currently allocated internal temps */
220 struct {
221 unsigned start, size;
222 } temp_arrays[MAX_TEMP_ARRAYS];
223 unsigned num_temp_arrays;
224
225 /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */
226 struct {
227 unsigned arrayId, index;
228 boolean initialized;
229 } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */
230
231 unsigned initialize_temp_index;
232
233 /** Number of constants used by original shader for each constant buffer.
234 * The size should probably always match with that of svga_state.constbufs.
235 */
236 unsigned num_shader_consts[SVGA_MAX_CONST_BUFS];
237
238 /* Samplers */
239 unsigned num_samplers;
240 boolean sampler_view[PIPE_MAX_SAMPLERS]; /**< True if sampler view exists*/
241 ubyte sampler_target[PIPE_MAX_SAMPLERS]; /**< TGSI_TEXTURE_x */
242 ubyte sampler_return_type[PIPE_MAX_SAMPLERS]; /**< TGSI_RETURN_TYPE_x */
243
244 /* Index Range declaration */
245 struct {
246 unsigned start_index;
247 unsigned count;
248 boolean required;
249 unsigned operandType;
250 unsigned size;
251 unsigned dim;
252 } index_range;
253
254 /* Address regs (really implemented with temps) */
255 unsigned num_address_regs;
256 unsigned address_reg_index[MAX_VGPU10_ADDR_REGS];
257
258 /* Output register usage masks */
259 ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS];
260
261 /* To map TGSI system value index to VGPU shader input indexes */
262 ubyte system_value_indexes[MAX_SYSTEM_VALUES];
263
264 struct {
265 /* vertex position scale/translation */
266 unsigned out_index; /**< the real position output reg */
267 unsigned tmp_index; /**< the fake/temp position output reg */
268 unsigned so_index; /**< the non-adjusted position output reg */
269 unsigned prescale_cbuf_index; /* index to the const buf for prescale */
270 unsigned prescale_scale_index, prescale_trans_index;
271 unsigned num_prescale; /* number of prescale factor in const buf */
272 unsigned viewport_index;
273 unsigned need_prescale:1;
274 unsigned have_prescale:1;
275 } vposition;
276
277 /* For vertex shaders only */
278 struct {
279 /* viewport constant */
280 unsigned viewport_index;
281
282 unsigned vertex_id_bias_index;
283 unsigned vertex_id_sys_index;
284 unsigned vertex_id_tmp_index;
285
286 /* temp index of adjusted vertex attributes */
287 unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS];
288 } vs;
289
290 /* For fragment shaders only */
291 struct {
292 unsigned color_out_index[PIPE_MAX_COLOR_BUFS]; /**< the real color output regs */
293 unsigned num_color_outputs;
294 unsigned color_tmp_index; /**< fake/temp color output reg */
295 unsigned alpha_ref_index; /**< immediate constant for alpha ref */
296
297 /* front-face */
298 unsigned face_input_index; /**< real fragment shader face reg (bool) */
299 unsigned face_tmp_index; /**< temp face reg converted to -1 / +1 */
300
301 unsigned pstipple_sampler_unit;
302
303 unsigned fragcoord_input_index; /**< real fragment position input reg */
304 unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */
305
306 unsigned sample_id_sys_index; /**< TGSI index of sample id sys value */
307
308 unsigned sample_pos_sys_index; /**< TGSI index of sample pos sys value */
309 unsigned sample_pos_tmp_index; /**< which temp reg has the sample pos */
310
311 /** TGSI index of sample mask input sys value */
312 unsigned sample_mask_in_sys_index;
313
314 /** Which texture units are doing shadow comparison in the FS code */
315 unsigned shadow_compare_units;
316
317 /* layer */
318 unsigned layer_input_index; /**< TGSI index of layer */
319 unsigned layer_imm_index; /**< immediate for default layer 0 */
320 } fs;
321
322 /* For geometry shaders only */
323 struct {
324 VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */
325 VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */
326 unsigned input_size; /**< size of input arrays */
327 unsigned prim_id_index; /**< primitive id register index */
328 unsigned max_out_vertices; /**< maximum number of output vertices */
329 unsigned invocations;
330 unsigned invocation_id_sys_index;
331
332 unsigned viewport_index_out_index;
333 unsigned viewport_index_tmp_index;
334 } gs;
335
336 /* For tessellation control shaders only */
337 struct {
338 unsigned vertices_per_patch_index; /**< vertices_per_patch system value index */
339 unsigned imm_index; /**< immediate for tcs */
340 unsigned invocation_id_sys_index; /**< invocation id */
341 unsigned invocation_id_tmp_index;
342 unsigned instruction_token_pos; /* token pos for the first instruction */
343 unsigned control_point_input_index; /* control point input register index */
344 unsigned control_point_addr_index; /* control point input address register */
345 unsigned control_point_out_index; /* control point output register index */
346 unsigned control_point_tmp_index; /* control point temporary register */
347 unsigned control_point_out_count; /* control point output count */
348 boolean control_point_phase; /* true if in control point phase */
349 boolean fork_phase_add_signature; /* true if needs to add signature in fork phase */
350 unsigned patch_generic_out_count; /* per-patch generic output count */
351 unsigned patch_generic_out_index; /* per-patch generic output register index*/
352 unsigned patch_generic_tmp_index; /* per-patch generic temporary register index*/
353 unsigned prim_id_index; /* primitive id */
354 struct {
355 unsigned out_index; /* real tessinner output register */
356 unsigned temp_index; /* tessinner temp register */
357 unsigned tgsi_index; /* tgsi tessinner output register */
358 } inner;
359 struct {
360 unsigned out_index; /* real tessouter output register */
361 unsigned temp_index; /* tessouter temp register */
362 unsigned tgsi_index; /* tgsi tessouter output register */
363 } outer;
364 } tcs;
365
366 /* For tessellation evaluation shaders only */
367 struct {
368 enum pipe_prim_type prim_mode;
369 enum pipe_tess_spacing spacing;
370 boolean vertices_order_cw;
371 boolean point_mode;
372 unsigned tesscoord_sys_index;
373 unsigned prim_id_index; /* primitive id */
374 struct {
375 unsigned in_index; /* real tessinner input register */
376 unsigned temp_index; /* tessinner temp register */
377 unsigned tgsi_index; /* tgsi tessinner input register */
378 } inner;
379 struct {
380 unsigned in_index; /* real tessouter input register */
381 unsigned temp_index; /* tessouter temp register */
382 unsigned tgsi_index; /* tgsi tessouter input register */
383 } outer;
384 } tes;
385
386 /* For vertex or geometry shaders */
387 enum clipping_mode clip_mode;
388 unsigned clip_dist_out_index; /**< clip distance output register index */
389 unsigned clip_dist_tmp_index; /**< clip distance temporary register */
390 unsigned clip_dist_so_index; /**< clip distance shadow copy */
391
392 /** Index of temporary holding the clipvertex coordinate */
393 unsigned clip_vertex_out_index; /**< clip vertex output register index */
394 unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */
395
396 /* user clip plane constant slot indexes */
397 unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
398
399 unsigned num_output_writes;
400 boolean constant_color_output;
401
402 boolean uses_flat_interp;
403
404 unsigned reserved_token; /* index to the reserved token */
405 boolean uses_precise_qualifier;
406
407 /* For all shaders: const reg index for RECT coord scaling */
408 unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS];
409
410 /* For all shaders: const reg index for texture buffer size */
411 unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS];
412
413 /* VS/TCS/TES/GS/FS Linkage info */
414 struct shader_linkage linkage;
415 struct tgsi_shader_info *prevShaderInfo;
416
417 /* Shader signature */
418 struct svga_shader_signature signature;
419
420 bool register_overflow; /**< Set if we exceed a VGPU10 register limit */
421
422 /* For pipe_debug_message */
423 struct pipe_debug_callback svga_debug_callback;
424
425 /* current loop depth in shader */
426 unsigned current_loop_depth;
427 };
428
429
430 static void emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit);
431 static void emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit);
432 static boolean emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit);
433 static boolean emit_constant_declaration(struct svga_shader_emitter_v10 *emit);
434 static boolean emit_sampler_declarations(struct svga_shader_emitter_v10 *emit);
435 static boolean emit_resource_declarations(struct svga_shader_emitter_v10 *emit);
436 static boolean emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit);
437 static boolean emit_index_range_declaration(struct svga_shader_emitter_v10 *emit);
438 static void emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit);
439
440 static boolean
441 emit_post_helpers(struct svga_shader_emitter_v10 *emit);
442
443 static boolean
444 emit_vertex(struct svga_shader_emitter_v10 *emit,
445 const struct tgsi_full_instruction *inst);
446
447 static boolean
448 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
449 unsigned inst_number,
450 const struct tgsi_full_instruction *inst);
451
452 static void
453 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
454 unsigned opcodeType, unsigned operandType,
455 unsigned dim, unsigned index, unsigned size,
456 unsigned name, unsigned numComp,
457 unsigned selMode, unsigned usageMask,
458 unsigned interpMode,
459 boolean addSignature,
460 SVGA3dDXSignatureSemanticName sgnName);
461
462 static void
463 create_temp_array(struct svga_shader_emitter_v10 *emit,
464 unsigned arrayID, unsigned first, unsigned count,
465 unsigned startIndex);
466
467 static char err_buf[128];
468
469 static boolean
470 expand(struct svga_shader_emitter_v10 *emit)
471 {
472 char *new_buf;
473 unsigned newsize = emit->size * 2;
474
475 if (emit->buf != err_buf)
476 new_buf = REALLOC(emit->buf, emit->size, newsize);
477 else
478 new_buf = NULL;
479
480 if (!new_buf) {
481 emit->ptr = err_buf;
482 emit->buf = err_buf;
483 emit->size = sizeof(err_buf);
484 return FALSE;
485 }
486
487 emit->size = newsize;
488 emit->ptr = new_buf + (emit->ptr - emit->buf);
489 emit->buf = new_buf;
490 return TRUE;
491 }
492
493 /**
494 * Create and initialize a new svga_shader_emitter_v10 object.
495 */
496 static struct svga_shader_emitter_v10 *
497 alloc_emitter(void)
498 {
499 struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit));
500
501 if (!emit)
502 return NULL;
503
504 /* to initialize the output buffer */
505 emit->size = 512;
506 if (!expand(emit)) {
507 FREE(emit);
508 return NULL;
509 }
510 return emit;
511 }
512
513 /**
514 * Free an svga_shader_emitter_v10 object.
515 */
516 static void
517 free_emitter(struct svga_shader_emitter_v10 *emit)
518 {
519 assert(emit);
520 FREE(emit->buf); /* will be NULL if translation succeeded */
521 FREE(emit);
522 }
523
524 static inline boolean
525 reserve(struct svga_shader_emitter_v10 *emit,
526 unsigned nr_dwords)
527 {
528 while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) {
529 if (!expand(emit))
530 return FALSE;
531 }
532
533 return TRUE;
534 }
535
536 static boolean
537 emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword)
538 {
539 if (!reserve(emit, 1))
540 return FALSE;
541
542 *(uint32 *)emit->ptr = dword;
543 emit->ptr += sizeof dword;
544 return TRUE;
545 }
546
547 static boolean
548 emit_dwords(struct svga_shader_emitter_v10 *emit,
549 const uint32 *dwords,
550 unsigned nr)
551 {
552 if (!reserve(emit, nr))
553 return FALSE;
554
555 memcpy(emit->ptr, dwords, nr * sizeof *dwords);
556 emit->ptr += nr * sizeof *dwords;
557 return TRUE;
558 }
559
560 /** Return the number of tokens in the emitter's buffer */
561 static unsigned
562 emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit)
563 {
564 return (emit->ptr - emit->buf) / sizeof(unsigned);
565 }
566
567
568 /**
569 * Check for register overflow. If we overflow we'll set an
570 * error flag. This function can be called for register declarations
571 * or use as src/dst instruction operands.
572 * \param type register type. One of VGPU10_OPERAND_TYPE_x
573 or VGPU10_OPCODE_DCL_x
574 * \param index the register index
575 */
576 static void
577 check_register_index(struct svga_shader_emitter_v10 *emit,
578 unsigned operandType, unsigned index)
579 {
580 bool overflow_before = emit->register_overflow;
581
582 switch (operandType) {
583 case VGPU10_OPERAND_TYPE_TEMP:
584 case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP:
585 case VGPU10_OPCODE_DCL_TEMPS:
586 if (index >= VGPU10_MAX_TEMPS) {
587 emit->register_overflow = TRUE;
588 }
589 break;
590 case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER:
591 case VGPU10_OPCODE_DCL_CONSTANT_BUFFER:
592 if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
593 emit->register_overflow = TRUE;
594 }
595 break;
596 case VGPU10_OPERAND_TYPE_INPUT:
597 case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID:
598 case VGPU10_OPCODE_DCL_INPUT:
599 case VGPU10_OPCODE_DCL_INPUT_SGV:
600 case VGPU10_OPCODE_DCL_INPUT_SIV:
601 case VGPU10_OPCODE_DCL_INPUT_PS:
602 case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
603 case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
604 if ((emit->unit == PIPE_SHADER_VERTEX &&
605 index >= VGPU10_MAX_VS_INPUTS) ||
606 (emit->unit == PIPE_SHADER_GEOMETRY &&
607 index >= VGPU10_MAX_GS_INPUTS) ||
608 (emit->unit == PIPE_SHADER_FRAGMENT &&
609 index >= VGPU10_MAX_FS_INPUTS) ||
610 (emit->unit == PIPE_SHADER_TESS_CTRL &&
611 index >= VGPU11_MAX_HS_INPUT_CONTROL_POINTS) ||
612 (emit->unit == PIPE_SHADER_TESS_EVAL &&
613 index >= VGPU11_MAX_DS_INPUT_CONTROL_POINTS)) {
614 emit->register_overflow = TRUE;
615 }
616 break;
617 case VGPU10_OPERAND_TYPE_OUTPUT:
618 case VGPU10_OPCODE_DCL_OUTPUT:
619 case VGPU10_OPCODE_DCL_OUTPUT_SGV:
620 case VGPU10_OPCODE_DCL_OUTPUT_SIV:
621 /* Note: we are skipping two output indices in tcs for
622 * tessinner/outer levels. Implementation will not exceed
623 * number of output count but it allows index to go beyond
624 * VGPU11_MAX_HS_OUTPUTS.
625 * Index will never be >= index >= VGPU11_MAX_HS_OUTPUTS + 2
626 */
627 if ((emit->unit == PIPE_SHADER_VERTEX &&
628 index >= VGPU10_MAX_VS_OUTPUTS) ||
629 (emit->unit == PIPE_SHADER_GEOMETRY &&
630 index >= VGPU10_MAX_GS_OUTPUTS) ||
631 (emit->unit == PIPE_SHADER_FRAGMENT &&
632 index >= VGPU10_MAX_FS_OUTPUTS) ||
633 (emit->unit == PIPE_SHADER_TESS_CTRL &&
634 index >= VGPU11_MAX_HS_OUTPUTS + 2) ||
635 (emit->unit == PIPE_SHADER_TESS_EVAL &&
636 index >= VGPU11_MAX_DS_OUTPUTS)) {
637 emit->register_overflow = TRUE;
638 }
639 break;
640 case VGPU10_OPERAND_TYPE_SAMPLER:
641 case VGPU10_OPCODE_DCL_SAMPLER:
642 if (index >= VGPU10_MAX_SAMPLERS) {
643 emit->register_overflow = TRUE;
644 }
645 break;
646 case VGPU10_OPERAND_TYPE_RESOURCE:
647 case VGPU10_OPCODE_DCL_RESOURCE:
648 if (index >= VGPU10_MAX_RESOURCES) {
649 emit->register_overflow = TRUE;
650 }
651 break;
652 case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
653 if (index >= MAX_IMMEDIATE_COUNT) {
654 emit->register_overflow = TRUE;
655 }
656 break;
657 case VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
658 /* nothing */
659 break;
660 default:
661 assert(0);
662 ; /* nothing */
663 }
664
665 if (emit->register_overflow && !overflow_before) {
666 debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n",
667 operandType, index);
668 }
669 }
670
671
672 /**
673 * Examine misc state to determine the clipping mode.
674 */
675 static void
676 determine_clipping_mode(struct svga_shader_emitter_v10 *emit)
677 {
678 /* num_written_clipdistance in the shader info for tessellation
679 * control shader is always 0 because the TGSI_PROPERTY_NUM_CLIPDIST_ENABLED
680 * is not defined for this shader. So we go through all the output declarations
681 * to set the num_written_clipdistance. This is just to determine the
682 * clipping mode.
683 */
684 if (emit->unit == PIPE_SHADER_TESS_CTRL) {
685 unsigned i;
686 for (i = 0; i < emit->info.num_outputs; i++) {
687 if (emit->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) {
688 emit->info.num_written_clipdistance =
689 4 * (emit->info.output_semantic_index[i] + 1);
690 }
691 }
692 }
693
694 if (emit->info.num_written_clipdistance > 0) {
695 emit->clip_mode = CLIP_DISTANCE;
696 }
697 else if (emit->info.writes_clipvertex) {
698 emit->clip_mode = CLIP_VERTEX;
699 }
700 else if (emit->key.clip_plane_enable && emit->key.last_vertex_stage) {
701 /*
702 * Only the last shader in the vertex processing stage needs to
703 * handle the legacy clip mode.
704 */
705 emit->clip_mode = CLIP_LEGACY;
706 }
707 else {
708 emit->clip_mode = CLIP_NONE;
709 }
710 }
711
712
713 /**
714 * For clip distance register declarations and clip distance register
715 * writes we need to mask the declaration usage or instruction writemask
716 * (respectively) against the set of the really-enabled clipping planes.
717 *
718 * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables
719 * has a VS that writes to all 8 clip distance registers, but the plane enable
720 * flags are a subset of that.
721 *
722 * This function is used to apply the plane enable flags to the register
723 * declaration or instruction writemask.
724 *
725 * \param writemask the declaration usage mask or instruction writemask
726 * \param clip_reg_index which clip plane register is being declared/written.
727 * The legal values are 0 and 1 (two clip planes per
728 * register, for a total of 8 clip planes)
729 */
730 static unsigned
731 apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit,
732 unsigned writemask, unsigned clip_reg_index)
733 {
734 unsigned shift;
735
736 assert(clip_reg_index < 2);
737
738 /* four clip planes per clip register: */
739 shift = clip_reg_index * 4;
740 writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf);
741
742 return writemask;
743 }
744
745
746 /**
747 * Translate gallium shader type into VGPU10 type.
748 */
749 static VGPU10_PROGRAM_TYPE
750 translate_shader_type(unsigned type)
751 {
752 switch (type) {
753 case PIPE_SHADER_VERTEX:
754 return VGPU10_VERTEX_SHADER;
755 case PIPE_SHADER_GEOMETRY:
756 return VGPU10_GEOMETRY_SHADER;
757 case PIPE_SHADER_FRAGMENT:
758 return VGPU10_PIXEL_SHADER;
759 case PIPE_SHADER_TESS_CTRL:
760 return VGPU10_HULL_SHADER;
761 case PIPE_SHADER_TESS_EVAL:
762 return VGPU10_DOMAIN_SHADER;
763 case PIPE_SHADER_COMPUTE:
764 return VGPU10_COMPUTE_SHADER;
765 default:
766 assert(!"Unexpected shader type");
767 return VGPU10_VERTEX_SHADER;
768 }
769 }
770
771
772 /**
773 * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x
774 * Note: we only need to translate the opcodes for "simple" instructions,
775 * as seen below. All other opcodes are handled/translated specially.
776 */
777 static VGPU10_OPCODE_TYPE
778 translate_opcode(enum tgsi_opcode opcode)
779 {
780 switch (opcode) {
781 case TGSI_OPCODE_MOV:
782 return VGPU10_OPCODE_MOV;
783 case TGSI_OPCODE_MUL:
784 return VGPU10_OPCODE_MUL;
785 case TGSI_OPCODE_ADD:
786 return VGPU10_OPCODE_ADD;
787 case TGSI_OPCODE_DP3:
788 return VGPU10_OPCODE_DP3;
789 case TGSI_OPCODE_DP4:
790 return VGPU10_OPCODE_DP4;
791 case TGSI_OPCODE_MIN:
792 return VGPU10_OPCODE_MIN;
793 case TGSI_OPCODE_MAX:
794 return VGPU10_OPCODE_MAX;
795 case TGSI_OPCODE_MAD:
796 return VGPU10_OPCODE_MAD;
797 case TGSI_OPCODE_SQRT:
798 return VGPU10_OPCODE_SQRT;
799 case TGSI_OPCODE_FRC:
800 return VGPU10_OPCODE_FRC;
801 case TGSI_OPCODE_FLR:
802 return VGPU10_OPCODE_ROUND_NI;
803 case TGSI_OPCODE_FSEQ:
804 return VGPU10_OPCODE_EQ;
805 case TGSI_OPCODE_FSGE:
806 return VGPU10_OPCODE_GE;
807 case TGSI_OPCODE_FSNE:
808 return VGPU10_OPCODE_NE;
809 case TGSI_OPCODE_DDX:
810 return VGPU10_OPCODE_DERIV_RTX;
811 case TGSI_OPCODE_DDY:
812 return VGPU10_OPCODE_DERIV_RTY;
813 case TGSI_OPCODE_RET:
814 return VGPU10_OPCODE_RET;
815 case TGSI_OPCODE_DIV:
816 return VGPU10_OPCODE_DIV;
817 case TGSI_OPCODE_IDIV:
818 return VGPU10_OPCODE_VMWARE;
819 case TGSI_OPCODE_DP2:
820 return VGPU10_OPCODE_DP2;
821 case TGSI_OPCODE_BRK:
822 return VGPU10_OPCODE_BREAK;
823 case TGSI_OPCODE_IF:
824 return VGPU10_OPCODE_IF;
825 case TGSI_OPCODE_ELSE:
826 return VGPU10_OPCODE_ELSE;
827 case TGSI_OPCODE_ENDIF:
828 return VGPU10_OPCODE_ENDIF;
829 case TGSI_OPCODE_CEIL:
830 return VGPU10_OPCODE_ROUND_PI;
831 case TGSI_OPCODE_I2F:
832 return VGPU10_OPCODE_ITOF;
833 case TGSI_OPCODE_NOT:
834 return VGPU10_OPCODE_NOT;
835 case TGSI_OPCODE_TRUNC:
836 return VGPU10_OPCODE_ROUND_Z;
837 case TGSI_OPCODE_SHL:
838 return VGPU10_OPCODE_ISHL;
839 case TGSI_OPCODE_AND:
840 return VGPU10_OPCODE_AND;
841 case TGSI_OPCODE_OR:
842 return VGPU10_OPCODE_OR;
843 case TGSI_OPCODE_XOR:
844 return VGPU10_OPCODE_XOR;
845 case TGSI_OPCODE_CONT:
846 return VGPU10_OPCODE_CONTINUE;
847 case TGSI_OPCODE_EMIT:
848 return VGPU10_OPCODE_EMIT;
849 case TGSI_OPCODE_ENDPRIM:
850 return VGPU10_OPCODE_CUT;
851 case TGSI_OPCODE_BGNLOOP:
852 return VGPU10_OPCODE_LOOP;
853 case TGSI_OPCODE_ENDLOOP:
854 return VGPU10_OPCODE_ENDLOOP;
855 case TGSI_OPCODE_ENDSUB:
856 return VGPU10_OPCODE_RET;
857 case TGSI_OPCODE_NOP:
858 return VGPU10_OPCODE_NOP;
859 case TGSI_OPCODE_END:
860 return VGPU10_OPCODE_RET;
861 case TGSI_OPCODE_F2I:
862 return VGPU10_OPCODE_FTOI;
863 case TGSI_OPCODE_IMAX:
864 return VGPU10_OPCODE_IMAX;
865 case TGSI_OPCODE_IMIN:
866 return VGPU10_OPCODE_IMIN;
867 case TGSI_OPCODE_UDIV:
868 case TGSI_OPCODE_UMOD:
869 case TGSI_OPCODE_MOD:
870 return VGPU10_OPCODE_UDIV;
871 case TGSI_OPCODE_IMUL_HI:
872 return VGPU10_OPCODE_IMUL;
873 case TGSI_OPCODE_INEG:
874 return VGPU10_OPCODE_INEG;
875 case TGSI_OPCODE_ISHR:
876 return VGPU10_OPCODE_ISHR;
877 case TGSI_OPCODE_ISGE:
878 return VGPU10_OPCODE_IGE;
879 case TGSI_OPCODE_ISLT:
880 return VGPU10_OPCODE_ILT;
881 case TGSI_OPCODE_F2U:
882 return VGPU10_OPCODE_FTOU;
883 case TGSI_OPCODE_UADD:
884 return VGPU10_OPCODE_IADD;
885 case TGSI_OPCODE_U2F:
886 return VGPU10_OPCODE_UTOF;
887 case TGSI_OPCODE_UCMP:
888 return VGPU10_OPCODE_MOVC;
889 case TGSI_OPCODE_UMAD:
890 return VGPU10_OPCODE_UMAD;
891 case TGSI_OPCODE_UMAX:
892 return VGPU10_OPCODE_UMAX;
893 case TGSI_OPCODE_UMIN:
894 return VGPU10_OPCODE_UMIN;
895 case TGSI_OPCODE_UMUL:
896 case TGSI_OPCODE_UMUL_HI:
897 return VGPU10_OPCODE_UMUL;
898 case TGSI_OPCODE_USEQ:
899 return VGPU10_OPCODE_IEQ;
900 case TGSI_OPCODE_USGE:
901 return VGPU10_OPCODE_UGE;
902 case TGSI_OPCODE_USHR:
903 return VGPU10_OPCODE_USHR;
904 case TGSI_OPCODE_USLT:
905 return VGPU10_OPCODE_ULT;
906 case TGSI_OPCODE_USNE:
907 return VGPU10_OPCODE_INE;
908 case TGSI_OPCODE_SWITCH:
909 return VGPU10_OPCODE_SWITCH;
910 case TGSI_OPCODE_CASE:
911 return VGPU10_OPCODE_CASE;
912 case TGSI_OPCODE_DEFAULT:
913 return VGPU10_OPCODE_DEFAULT;
914 case TGSI_OPCODE_ENDSWITCH:
915 return VGPU10_OPCODE_ENDSWITCH;
916 case TGSI_OPCODE_FSLT:
917 return VGPU10_OPCODE_LT;
918 case TGSI_OPCODE_ROUND:
919 return VGPU10_OPCODE_ROUND_NE;
920 /* Begin SM5 opcodes */
921 case TGSI_OPCODE_F2D:
922 return VGPU10_OPCODE_FTOD;
923 case TGSI_OPCODE_D2F:
924 return VGPU10_OPCODE_DTOF;
925 case TGSI_OPCODE_DMUL:
926 return VGPU10_OPCODE_DMUL;
927 case TGSI_OPCODE_DADD:
928 return VGPU10_OPCODE_DADD;
929 case TGSI_OPCODE_DMAX:
930 return VGPU10_OPCODE_DMAX;
931 case TGSI_OPCODE_DMIN:
932 return VGPU10_OPCODE_DMIN;
933 case TGSI_OPCODE_DSEQ:
934 return VGPU10_OPCODE_DEQ;
935 case TGSI_OPCODE_DSGE:
936 return VGPU10_OPCODE_DGE;
937 case TGSI_OPCODE_DSLT:
938 return VGPU10_OPCODE_DLT;
939 case TGSI_OPCODE_DSNE:
940 return VGPU10_OPCODE_DNE;
941 case TGSI_OPCODE_IBFE:
942 return VGPU10_OPCODE_IBFE;
943 case TGSI_OPCODE_UBFE:
944 return VGPU10_OPCODE_UBFE;
945 case TGSI_OPCODE_BFI:
946 return VGPU10_OPCODE_BFI;
947 case TGSI_OPCODE_BREV:
948 return VGPU10_OPCODE_BFREV;
949 case TGSI_OPCODE_POPC:
950 return VGPU10_OPCODE_COUNTBITS;
951 case TGSI_OPCODE_LSB:
952 return VGPU10_OPCODE_FIRSTBIT_LO;
953 case TGSI_OPCODE_IMSB:
954 return VGPU10_OPCODE_FIRSTBIT_SHI;
955 case TGSI_OPCODE_UMSB:
956 return VGPU10_OPCODE_FIRSTBIT_HI;
957 case TGSI_OPCODE_INTERP_CENTROID:
958 return VGPU10_OPCODE_EVAL_CENTROID;
959 case TGSI_OPCODE_INTERP_SAMPLE:
960 return VGPU10_OPCODE_EVAL_SAMPLE_INDEX;
961 case TGSI_OPCODE_BARRIER:
962 return VGPU10_OPCODE_SYNC;
963
964 /* DX11.1 Opcodes */
965 case TGSI_OPCODE_DDIV:
966 return VGPU10_OPCODE_DDIV;
967 case TGSI_OPCODE_DRCP:
968 return VGPU10_OPCODE_DRCP;
969 case TGSI_OPCODE_D2I:
970 return VGPU10_OPCODE_DTOI;
971 case TGSI_OPCODE_D2U:
972 return VGPU10_OPCODE_DTOU;
973 case TGSI_OPCODE_I2D:
974 return VGPU10_OPCODE_ITOD;
975 case TGSI_OPCODE_U2D:
976 return VGPU10_OPCODE_UTOD;
977
978 case TGSI_OPCODE_SAMPLE_POS:
979 /* Note: we never actually get this opcode because there's no GLSL
980 * function to query multisample resource sample positions. There's
981 * only the TGSI_SEMANTIC_SAMPLEPOS system value which contains the
982 * position of the current sample in the render target.
983 */
984 /* FALL-THROUGH */
985 case TGSI_OPCODE_SAMPLE_INFO:
986 /* NOTE: we never actually get this opcode because the GLSL compiler
987 * implements the gl_NumSamples variable with a simple constant in the
988 * constant buffer.
989 */
990 /* FALL-THROUGH */
991 default:
992 assert(!"Unexpected TGSI opcode in translate_opcode()");
993 return VGPU10_OPCODE_NOP;
994 }
995 }
996
997
998 /**
999 * Translate a TGSI register file type into a VGPU10 operand type.
1000 * \param array is the TGSI_FILE_TEMPORARY register an array?
1001 */
1002 static VGPU10_OPERAND_TYPE
1003 translate_register_file(enum tgsi_file_type file, boolean array)
1004 {
1005 switch (file) {
1006 case TGSI_FILE_CONSTANT:
1007 return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
1008 case TGSI_FILE_INPUT:
1009 return VGPU10_OPERAND_TYPE_INPUT;
1010 case TGSI_FILE_OUTPUT:
1011 return VGPU10_OPERAND_TYPE_OUTPUT;
1012 case TGSI_FILE_TEMPORARY:
1013 return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP
1014 : VGPU10_OPERAND_TYPE_TEMP;
1015 case TGSI_FILE_IMMEDIATE:
1016 /* all immediates are 32-bit values at this time so
1017 * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time.
1018 */
1019 return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER;
1020 case TGSI_FILE_SAMPLER:
1021 return VGPU10_OPERAND_TYPE_SAMPLER;
1022 case TGSI_FILE_SYSTEM_VALUE:
1023 return VGPU10_OPERAND_TYPE_INPUT;
1024
1025 /* XXX TODO more cases to finish */
1026
1027 default:
1028 assert(!"Bad tgsi register file!");
1029 return VGPU10_OPERAND_TYPE_NULL;
1030 }
1031 }
1032
1033
1034 /**
1035 * Emit a null dst register
1036 */
1037 static void
1038 emit_null_dst_register(struct svga_shader_emitter_v10 *emit)
1039 {
1040 VGPU10OperandToken0 operand;
1041
1042 operand.value = 0;
1043 operand.operandType = VGPU10_OPERAND_TYPE_NULL;
1044 operand.numComponents = VGPU10_OPERAND_0_COMPONENT;
1045
1046 emit_dword(emit, operand.value);
1047 }
1048
1049
1050 /**
1051 * If the given register is a temporary, return the array ID.
1052 * Else return zero.
1053 */
1054 static unsigned
1055 get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
1056 enum tgsi_file_type file, unsigned index)
1057 {
1058 if (file == TGSI_FILE_TEMPORARY) {
1059 return emit->temp_map[index].arrayId;
1060 }
1061 else {
1062 return 0;
1063 }
1064 }
1065
1066
1067 /**
1068 * If the given register is a temporary, convert the index from a TGSI
1069 * TEMPORARY index to a VGPU10 temp index.
1070 */
1071 static unsigned
1072 remap_temp_index(const struct svga_shader_emitter_v10 *emit,
1073 enum tgsi_file_type file, unsigned index)
1074 {
1075 if (file == TGSI_FILE_TEMPORARY) {
1076 return emit->temp_map[index].index;
1077 }
1078 else {
1079 return index;
1080 }
1081 }
1082
1083
1084 /**
1085 * Setup the operand0 fields related to indexing (1D, 2D, relative, etc).
1086 * Note: the operandType field must already be initialized.
1087 * \param file the register file being accessed
1088 * \param indirect using indirect addressing of the register file?
1089 * \param index2D if true, 2-D indexing is being used (const or temp registers)
1090 * \param indirect2D if true, 2-D indirect indexing being used (for const buf)
1091 */
1092 static VGPU10OperandToken0
1093 setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
1094 VGPU10OperandToken0 operand0,
1095 enum tgsi_file_type file,
1096 boolean indirect,
1097 boolean index2D, bool indirect2D)
1098 {
1099 VGPU10_OPERAND_INDEX_REPRESENTATION index0Rep, index1Rep;
1100 VGPU10_OPERAND_INDEX_DIMENSION indexDim;
1101
1102 /*
1103 * Compute index dimensions
1104 */
1105 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 ||
1106 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
1107 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
1108 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
1109 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP ||
1110 operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) {
1111 /* there's no swizzle for in-line immediates */
1112 indexDim = VGPU10_OPERAND_INDEX_0D;
1113 assert(operand0.selectionMode == 0);
1114 }
1115 else if (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT) {
1116 indexDim = VGPU10_OPERAND_INDEX_0D;
1117 }
1118 else {
1119 indexDim = index2D ? VGPU10_OPERAND_INDEX_2D : VGPU10_OPERAND_INDEX_1D;
1120 }
1121
1122 /*
1123 * Compute index representation(s) (immediate vs relative).
1124 */
1125 if (indexDim == VGPU10_OPERAND_INDEX_2D) {
1126 index0Rep = indirect2D ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1127 : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1128
1129 index1Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1130 : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1131 }
1132 else if (indexDim == VGPU10_OPERAND_INDEX_1D) {
1133 index0Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1134 : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1135
1136 index1Rep = 0;
1137 }
1138 else {
1139 index0Rep = 0;
1140 index1Rep = 0;
1141 }
1142
1143 operand0.indexDimension = indexDim;
1144 operand0.index0Representation = index0Rep;
1145 operand0.index1Representation = index1Rep;
1146
1147 return operand0;
1148 }
1149
1150
1151 /**
1152 * Emit the operand for expressing an address register for indirect indexing.
1153 * Note that the address register is really just a temp register.
1154 * \param addr_reg_index which address register to use
1155 */
1156 static void
1157 emit_indirect_register(struct svga_shader_emitter_v10 *emit,
1158 unsigned addr_reg_index)
1159 {
1160 unsigned tmp_reg_index;
1161 VGPU10OperandToken0 operand0;
1162
1163 assert(addr_reg_index < MAX_VGPU10_ADDR_REGS);
1164
1165 tmp_reg_index = emit->address_reg_index[addr_reg_index];
1166
1167 /* operand0 is a simple temporary register, selecting one component */
1168 operand0.value = 0;
1169 operand0.operandType = VGPU10_OPERAND_TYPE_TEMP;
1170 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1171 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1172 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1173 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1174 operand0.swizzleX = 0;
1175 operand0.swizzleY = 1;
1176 operand0.swizzleZ = 2;
1177 operand0.swizzleW = 3;
1178
1179 emit_dword(emit, operand0.value);
1180 emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index));
1181 }
1182
1183
1184 /**
1185 * Translate the dst register of a TGSI instruction and emit VGPU10 tokens.
1186 * \param emit the emitter context
1187 * \param reg the TGSI dst register to translate
1188 */
1189 static void
1190 emit_dst_register(struct svga_shader_emitter_v10 *emit,
1191 const struct tgsi_full_dst_register *reg)
1192 {
1193 enum tgsi_file_type file = reg->Register.File;
1194 unsigned index = reg->Register.Index;
1195 const enum tgsi_semantic sem_name = emit->info.output_semantic_name[index];
1196 const unsigned sem_index = emit->info.output_semantic_index[index];
1197 unsigned writemask = reg->Register.WriteMask;
1198 const boolean indirect = reg->Register.Indirect;
1199 unsigned tempArrayId = get_temp_array_id(emit, file, index);
1200 boolean index2d = reg->Register.Dimension || tempArrayId > 0;
1201 VGPU10OperandToken0 operand0;
1202
1203 if (file == TGSI_FILE_TEMPORARY) {
1204 emit->temp_map[index].initialized = TRUE;
1205 }
1206
1207 if (file == TGSI_FILE_OUTPUT) {
1208 if (emit->unit == PIPE_SHADER_VERTEX ||
1209 emit->unit == PIPE_SHADER_GEOMETRY ||
1210 emit->unit == PIPE_SHADER_TESS_EVAL) {
1211 if (index == emit->vposition.out_index &&
1212 emit->vposition.tmp_index != INVALID_INDEX) {
1213 /* replace OUTPUT[POS] with TEMP[POS]. We need to store the
1214 * vertex position result in a temporary so that we can modify
1215 * it in the post_helper() code.
1216 */
1217 file = TGSI_FILE_TEMPORARY;
1218 index = emit->vposition.tmp_index;
1219 }
1220 else if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1221 emit->clip_dist_tmp_index != INVALID_INDEX) {
1222 /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1223 * We store the clip distance in a temporary first, then
1224 * we'll copy it to the shadow copy and to CLIPDIST with the
1225 * enabled planes mask in emit_clip_distance_instructions().
1226 */
1227 file = TGSI_FILE_TEMPORARY;
1228 index = emit->clip_dist_tmp_index + sem_index;
1229 }
1230 else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1231 emit->clip_vertex_tmp_index != INVALID_INDEX) {
1232 /* replace the CLIPVERTEX output register with a temporary */
1233 assert(emit->clip_mode == CLIP_VERTEX);
1234 assert(sem_index == 0);
1235 file = TGSI_FILE_TEMPORARY;
1236 index = emit->clip_vertex_tmp_index;
1237 }
1238 else if (sem_name == TGSI_SEMANTIC_COLOR &&
1239 emit->key.clamp_vertex_color) {
1240
1241 /* set the saturate modifier of the instruction
1242 * to clamp the vertex color.
1243 */
1244 VGPU10OpcodeToken0 *token =
1245 (VGPU10OpcodeToken0 *)emit->buf + emit->inst_start_token;
1246 token->saturate = TRUE;
1247 }
1248 else if (sem_name == TGSI_SEMANTIC_VIEWPORT_INDEX &&
1249 emit->gs.viewport_index_out_index != INVALID_INDEX) {
1250 file = TGSI_FILE_TEMPORARY;
1251 index = emit->gs.viewport_index_tmp_index;
1252 }
1253 }
1254 else if (emit->unit == PIPE_SHADER_FRAGMENT) {
1255 if (sem_name == TGSI_SEMANTIC_POSITION) {
1256 /* Fragment depth output register */
1257 operand0.value = 0;
1258 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
1259 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1260 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1261 emit_dword(emit, operand0.value);
1262 return;
1263 }
1264 else if (sem_name == TGSI_SEMANTIC_SAMPLEMASK) {
1265 /* Fragment sample mask output */
1266 operand0.value = 0;
1267 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
1268 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1269 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1270 emit_dword(emit, operand0.value);
1271 return;
1272 }
1273 else if (index == emit->fs.color_out_index[0] &&
1274 emit->fs.color_tmp_index != INVALID_INDEX) {
1275 /* replace OUTPUT[COLOR] with TEMP[COLOR]. We need to store the
1276 * fragment color result in a temporary so that we can read it
1277 * it in the post_helper() code.
1278 */
1279 file = TGSI_FILE_TEMPORARY;
1280 index = emit->fs.color_tmp_index;
1281 }
1282 else {
1283 /* Typically, for fragment shaders, the output register index
1284 * matches the color semantic index. But not when we write to
1285 * the fragment depth register. In that case, OUT[0] will be
1286 * fragdepth and OUT[1] will be the 0th color output. We need
1287 * to use the semantic index for color outputs.
1288 */
1289 assert(sem_name == TGSI_SEMANTIC_COLOR);
1290 index = emit->info.output_semantic_index[index];
1291
1292 emit->num_output_writes++;
1293 }
1294 }
1295 else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1296 if (index == emit->tcs.inner.tgsi_index) {
1297 /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1298 * in temporary for now so that will be store into appropriate
1299 * registers in post_helper() in patch constant phase.
1300 */
1301 if (emit->tcs.control_point_phase) {
1302 /* Discard writing into tessfactor in control point phase */
1303 emit->discard_instruction = TRUE;
1304 }
1305 else {
1306 file = TGSI_FILE_TEMPORARY;
1307 index = emit->tcs.inner.temp_index;
1308 }
1309 }
1310 else if (index == emit->tcs.outer.tgsi_index) {
1311 /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1312 * in temporary for now so that will be store into appropriate
1313 * registers in post_helper().
1314 */
1315 if (emit->tcs.control_point_phase) {
1316 /* Discard writing into tessfactor in control point phase */
1317 emit->discard_instruction = TRUE;
1318 }
1319 else {
1320 file = TGSI_FILE_TEMPORARY;
1321 index = emit->tcs.outer.temp_index;
1322 }
1323 }
1324 else if (index >= emit->tcs.patch_generic_out_index &&
1325 index < (emit->tcs.patch_generic_out_index +
1326 emit->tcs.patch_generic_out_count)) {
1327 if (emit->tcs.control_point_phase) {
1328 /* Discard writing into generic patch constant outputs in
1329 control point phase */
1330 emit->discard_instruction = TRUE;
1331 }
1332 else {
1333 if (emit->reemit_instruction) {
1334 /* Store results of reemitted instruction in temporary register. */
1335 file = TGSI_FILE_TEMPORARY;
1336 index = emit->tcs.patch_generic_tmp_index +
1337 (index - emit->tcs.patch_generic_out_index);
1338 /**
1339 * Temporaries for patch constant data can be done
1340 * as indexable temporaries.
1341 */
1342 tempArrayId = get_temp_array_id(emit, file, index);
1343 index2d = tempArrayId > 0;
1344
1345 emit->reemit_instruction = FALSE;
1346 }
1347 else {
1348 /* If per-patch outputs is been read in shader, we
1349 * reemit instruction and store results in temporaries in
1350 * patch constant phase. */
1351 if (emit->info.reads_perpatch_outputs) {
1352 emit->reemit_instruction = TRUE;
1353 }
1354 }
1355 }
1356 }
1357 else if (reg->Register.Dimension) {
1358 /* Only control point outputs are declared 2D in tgsi */
1359 if (emit->tcs.control_point_phase) {
1360 if (emit->reemit_instruction) {
1361 /* Store results of reemitted instruction in temporary register. */
1362 index2d = FALSE;
1363 file = TGSI_FILE_TEMPORARY;
1364 index = emit->tcs.control_point_tmp_index +
1365 (index - emit->tcs.control_point_out_index);
1366 emit->reemit_instruction = FALSE;
1367 }
1368 else {
1369 /* The mapped control point outputs are 1-D */
1370 index2d = FALSE;
1371 if (emit->info.reads_pervertex_outputs) {
1372 /* If per-vertex outputs is been read in shader, we
1373 * reemit instruction and store results in temporaries
1374 * control point phase. */
1375 emit->reemit_instruction = TRUE;
1376 }
1377 }
1378
1379 if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1380 emit->clip_dist_tmp_index != INVALID_INDEX) {
1381 /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1382 * We store the clip distance in a temporary first, then
1383 * we'll copy it to the shadow copy and to CLIPDIST with the
1384 * enabled planes mask in emit_clip_distance_instructions().
1385 */
1386 file = TGSI_FILE_TEMPORARY;
1387 index = emit->clip_dist_tmp_index + sem_index;
1388 }
1389 else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1390 emit->clip_vertex_tmp_index != INVALID_INDEX) {
1391 /* replace the CLIPVERTEX output register with a temporary */
1392 assert(emit->clip_mode == CLIP_VERTEX);
1393 assert(sem_index == 0);
1394 file = TGSI_FILE_TEMPORARY;
1395 index = emit->clip_vertex_tmp_index;
1396 }
1397 }
1398 else {
1399 /* Discard writing into control point outputs in
1400 patch constant phase */
1401 emit->discard_instruction = TRUE;
1402 }
1403 }
1404 }
1405 }
1406
1407 /* init operand tokens to all zero */
1408 operand0.value = 0;
1409
1410 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1411
1412 /* the operand has a writemask */
1413 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1414
1415 /* Which of the four dest components to write to. Note that we can use a
1416 * simple assignment here since TGSI writemasks match VGPU10 writemasks.
1417 */
1418 STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X);
1419 operand0.mask = writemask;
1420
1421 /* translate TGSI register file type to VGPU10 operand type */
1422 operand0.operandType = translate_register_file(file, tempArrayId > 0);
1423
1424 check_register_index(emit, operand0.operandType, index);
1425
1426 operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1427 index2d, FALSE);
1428
1429 /* Emit tokens */
1430 emit_dword(emit, operand0.value);
1431 if (tempArrayId > 0) {
1432 emit_dword(emit, tempArrayId);
1433 }
1434
1435 emit_dword(emit, remap_temp_index(emit, file, index));
1436
1437 if (indirect) {
1438 emit_indirect_register(emit, reg->Indirect.Index);
1439 }
1440 }
1441
1442
1443 /**
1444 * Check if temporary register needs to be initialize when
1445 * shader is not using indirect addressing for temporary and uninitialized
1446 * temporary is not used in loop. In these two scenarios, we cannot
1447 * determine if temporary is initialized or not.
1448 */
1449 static boolean
1450 need_temp_reg_initialization(struct svga_shader_emitter_v10 *emit,
1451 unsigned index)
1452 {
1453 if (!(emit->info.indirect_files && (1u << TGSI_FILE_TEMPORARY))
1454 && emit->current_loop_depth == 0) {
1455 if (!emit->temp_map[index].initialized &&
1456 emit->temp_map[index].index < emit->num_shader_temps) {
1457 return TRUE;
1458 }
1459 }
1460
1461 return FALSE;
1462 }
1463
1464
1465 /**
1466 * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
1467 * In quite a few cases, we do register substitution. For example, if
1468 * the TGSI register is the front/back-face register, we replace that with
1469 * a temp register containing a value we computed earlier.
1470 */
1471 static void
1472 emit_src_register(struct svga_shader_emitter_v10 *emit,
1473 const struct tgsi_full_src_register *reg)
1474 {
1475 enum tgsi_file_type file = reg->Register.File;
1476 unsigned index = reg->Register.Index;
1477 const boolean indirect = reg->Register.Indirect;
1478 unsigned tempArrayId = get_temp_array_id(emit, file, index);
1479 boolean index2d = (reg->Register.Dimension ||
1480 tempArrayId > 0 ||
1481 file == TGSI_FILE_CONSTANT);
1482 unsigned index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1483 boolean indirect2d = reg->Dimension.Indirect;
1484 unsigned swizzleX = reg->Register.SwizzleX;
1485 unsigned swizzleY = reg->Register.SwizzleY;
1486 unsigned swizzleZ = reg->Register.SwizzleZ;
1487 unsigned swizzleW = reg->Register.SwizzleW;
1488 const boolean absolute = reg->Register.Absolute;
1489 const boolean negate = reg->Register.Negate;
1490 VGPU10OperandToken0 operand0;
1491 VGPU10OperandToken1 operand1;
1492
1493 operand0.value = operand1.value = 0;
1494
1495 if (emit->unit == PIPE_SHADER_FRAGMENT){
1496 if (file == TGSI_FILE_INPUT) {
1497 if (index == emit->fs.face_input_index) {
1498 /* Replace INPUT[FACE] with TEMP[FACE] */
1499 file = TGSI_FILE_TEMPORARY;
1500 index = emit->fs.face_tmp_index;
1501 }
1502 else if (index == emit->fs.fragcoord_input_index) {
1503 /* Replace INPUT[POSITION] with TEMP[POSITION] */
1504 file = TGSI_FILE_TEMPORARY;
1505 index = emit->fs.fragcoord_tmp_index;
1506 }
1507 else if (index == emit->fs.layer_input_index) {
1508 /* Replace INPUT[LAYER] with zero.x */
1509 file = TGSI_FILE_IMMEDIATE;
1510 index = emit->fs.layer_imm_index;
1511 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1512 }
1513 else {
1514 /* We remap fragment shader inputs to that FS input indexes
1515 * match up with VS/GS output indexes.
1516 */
1517 index = emit->linkage.input_map[index];
1518 }
1519 }
1520 else if (file == TGSI_FILE_SYSTEM_VALUE) {
1521 if (index == emit->fs.sample_pos_sys_index) {
1522 assert(emit->version >= 41);
1523 /* Current sample position is in a temp register */
1524 file = TGSI_FILE_TEMPORARY;
1525 index = emit->fs.sample_pos_tmp_index;
1526 }
1527 else if (index == emit->fs.sample_mask_in_sys_index) {
1528 /* Emitted as vCoverage0.x */
1529 /* According to GLSL spec, the gl_SampleMaskIn array has ceil(s / 32)
1530 * elements where s is the maximum number of color samples supported
1531 * by the implementation. With current implementation, we should not
1532 * have more than one element. So assert if Index != 0
1533 */
1534 assert((!reg->Register.Indirect && reg->Register.Index == 0) ||
1535 reg->Register.Indirect);
1536 operand0.value = 0;
1537 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK;
1538 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1539 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1540 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1541 emit_dword(emit, operand0.value);
1542 return;
1543 }
1544 else {
1545 /* Map the TGSI system value to a VGPU10 input register */
1546 assert(index < ARRAY_SIZE(emit->system_value_indexes));
1547 file = TGSI_FILE_INPUT;
1548 index = emit->system_value_indexes[index];
1549 }
1550 }
1551 }
1552 else if (emit->unit == PIPE_SHADER_GEOMETRY) {
1553 if (file == TGSI_FILE_INPUT) {
1554 if (index == emit->gs.prim_id_index) {
1555 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
1556 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1557 }
1558 index = emit->linkage.input_map[index];
1559 }
1560 else if (file == TGSI_FILE_SYSTEM_VALUE &&
1561 index == emit->gs.invocation_id_sys_index) {
1562 /* Emitted as vGSInstanceID0.x */
1563 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1564 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID;
1565 index = 0;
1566 }
1567 }
1568 else if (emit->unit == PIPE_SHADER_VERTEX) {
1569 if (file == TGSI_FILE_INPUT) {
1570 /* if input is adjusted... */
1571 if ((emit->key.vs.adjust_attrib_w_1 |
1572 emit->key.vs.adjust_attrib_itof |
1573 emit->key.vs.adjust_attrib_utof |
1574 emit->key.vs.attrib_is_bgra |
1575 emit->key.vs.attrib_puint_to_snorm |
1576 emit->key.vs.attrib_puint_to_uscaled |
1577 emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) {
1578 file = TGSI_FILE_TEMPORARY;
1579 index = emit->vs.adjusted_input[index];
1580 }
1581 }
1582 else if (file == TGSI_FILE_SYSTEM_VALUE) {
1583 if (index == emit->vs.vertex_id_sys_index &&
1584 emit->vs.vertex_id_tmp_index != INVALID_INDEX) {
1585 file = TGSI_FILE_TEMPORARY;
1586 index = emit->vs.vertex_id_tmp_index;
1587 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1588 }
1589 else {
1590 /* Map the TGSI system value to a VGPU10 input register */
1591 assert(index < ARRAY_SIZE(emit->system_value_indexes));
1592 file = TGSI_FILE_INPUT;
1593 index = emit->system_value_indexes[index];
1594 }
1595 }
1596 }
1597 else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1598
1599 if (file == TGSI_FILE_SYSTEM_VALUE) {
1600 if (index == emit->tcs.vertices_per_patch_index) {
1601 /**
1602 * if source register is the system value for vertices_per_patch,
1603 * replace it with the immediate.
1604 */
1605 file = TGSI_FILE_IMMEDIATE;
1606 index = emit->tcs.imm_index;
1607 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1608 }
1609 else if (index == emit->tcs.invocation_id_sys_index) {
1610 if (emit->tcs.control_point_phase) {
1611 /**
1612 * Emitted as vOutputControlPointID.x
1613 */
1614 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1615 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID;
1616 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1617 operand0.mask = 0;
1618 emit_dword(emit, operand0.value);
1619 return;
1620 }
1621 else {
1622 /* There is no control point ID input declaration in
1623 * the patch constant phase in hull shader.
1624 * Since for now we are emitting all instructions in
1625 * the patch constant phase, we are replacing the
1626 * control point ID reference with the immediate 0.
1627 */
1628 file = TGSI_FILE_IMMEDIATE;
1629 index = emit->tcs.imm_index;
1630 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_W;
1631 }
1632 }
1633 else if (index == emit->tcs.prim_id_index) {
1634 /**
1635 * Emitted as vPrim.x
1636 */
1637 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1638 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1639 index = 0;
1640 }
1641 }
1642 else if (file == TGSI_FILE_INPUT) {
1643 index = emit->linkage.input_map[index];
1644 if (!emit->tcs.control_point_phase) {
1645 /* Emitted as vicp */
1646 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1647 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1648 assert(reg->Register.Dimension);
1649 }
1650 }
1651 else if (file == TGSI_FILE_OUTPUT) {
1652 if ((index >= emit->tcs.patch_generic_out_index &&
1653 index < (emit->tcs.patch_generic_out_index +
1654 emit->tcs.patch_generic_out_count)) ||
1655 index == emit->tcs.inner.tgsi_index ||
1656 index == emit->tcs.outer.tgsi_index) {
1657 if (emit->tcs.control_point_phase) {
1658 emit->discard_instruction = TRUE;
1659 }
1660 else {
1661 /* Device doesn't allow reading from output so
1662 * use corresponding temporary register as source */
1663 file = TGSI_FILE_TEMPORARY;
1664 if (index == emit->tcs.inner.tgsi_index) {
1665 index = emit->tcs.inner.temp_index;
1666 }
1667 else if (index == emit->tcs.outer.tgsi_index) {
1668 index = emit->tcs.outer.temp_index;
1669 }
1670 else {
1671 index = emit->tcs.patch_generic_tmp_index +
1672 (index - emit->tcs.patch_generic_out_index);
1673 }
1674
1675 /**
1676 * Temporaries for patch constant data can be done
1677 * as indexable temporaries.
1678 */
1679 tempArrayId = get_temp_array_id(emit, file, index);
1680 index2d = tempArrayId > 0;
1681 index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1682 }
1683 }
1684 else if (index2d) {
1685 if (emit->tcs.control_point_phase) {
1686 /* Device doesn't allow reading from output so
1687 * use corresponding temporary register as source */
1688 file = TGSI_FILE_TEMPORARY;
1689 index2d = FALSE;
1690 index = emit->tcs.control_point_tmp_index +
1691 (index - emit->tcs.control_point_out_index);
1692 }
1693 else {
1694 emit->discard_instruction = TRUE;
1695 }
1696 }
1697 }
1698 }
1699 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
1700 if (file == TGSI_FILE_SYSTEM_VALUE) {
1701 if (index == emit->tes.tesscoord_sys_index) {
1702 /**
1703 * Emitted as vDomain
1704 */
1705 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1706 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT;
1707 index = 0;
1708 }
1709 else if (index == emit->tes.inner.tgsi_index) {
1710 file = TGSI_FILE_TEMPORARY;
1711 index = emit->tes.inner.temp_index;
1712 }
1713 else if (index == emit->tes.outer.tgsi_index) {
1714 file = TGSI_FILE_TEMPORARY;
1715 index = emit->tes.outer.temp_index;
1716 }
1717 else if (index == emit->tes.prim_id_index) {
1718 /**
1719 * Emitted as vPrim.x
1720 */
1721 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1722 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1723 index = 0;
1724 }
1725
1726 }
1727 else if (file == TGSI_FILE_INPUT) {
1728 if (index2d) {
1729 /* 2D input is emitted as vcp (input control point). */
1730 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1731 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1732
1733 /* index specifies the element index and is remapped
1734 * to align with the tcs output index.
1735 */
1736 index = emit->linkage.input_map[index];
1737
1738 assert(index2 < emit->key.tes.vertices_per_patch);
1739 }
1740 else {
1741 if (index < emit->key.tes.tessfactor_index)
1742 /* index specifies the generic patch index.
1743 * Remapped to match up with the tcs output index.
1744 */
1745 index = emit->linkage.input_map[index];
1746
1747 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
1748 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1749 }
1750 }
1751 }
1752
1753 if (file == TGSI_FILE_ADDRESS) {
1754 index = emit->address_reg_index[index];
1755 file = TGSI_FILE_TEMPORARY;
1756 }
1757
1758 if (file == TGSI_FILE_TEMPORARY) {
1759 if (need_temp_reg_initialization(emit, index)) {
1760 emit->initialize_temp_index = index;
1761 emit->discard_instruction = TRUE;
1762 }
1763 }
1764
1765 if (operand0.value == 0) {
1766 /* if operand0 was not set above for a special case, do the general
1767 * case now.
1768 */
1769 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1770 operand0.operandType = translate_register_file(file, tempArrayId > 0);
1771 }
1772 operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1773 index2d, indirect2d);
1774
1775 if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 &&
1776 operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
1777 /* there's no swizzle for in-line immediates */
1778 if (swizzleX == swizzleY &&
1779 swizzleX == swizzleZ &&
1780 swizzleX == swizzleW) {
1781 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1782 }
1783 else {
1784 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1785 }
1786
1787 operand0.swizzleX = swizzleX;
1788 operand0.swizzleY = swizzleY;
1789 operand0.swizzleZ = swizzleZ;
1790 operand0.swizzleW = swizzleW;
1791
1792 if (absolute || negate) {
1793 operand0.extended = 1;
1794 operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER;
1795 if (absolute && !negate)
1796 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS;
1797 if (!absolute && negate)
1798 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG;
1799 if (absolute && negate)
1800 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG;
1801 }
1802 }
1803
1804 /* Emit the operand tokens */
1805 emit_dword(emit, operand0.value);
1806 if (operand0.extended)
1807 emit_dword(emit, operand1.value);
1808
1809 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) {
1810 /* Emit the four float/int in-line immediate values */
1811 unsigned *c;
1812 assert(index < ARRAY_SIZE(emit->immediates));
1813 assert(file == TGSI_FILE_IMMEDIATE);
1814 assert(swizzleX < 4);
1815 assert(swizzleY < 4);
1816 assert(swizzleZ < 4);
1817 assert(swizzleW < 4);
1818 c = (unsigned *) emit->immediates[index];
1819 emit_dword(emit, c[swizzleX]);
1820 emit_dword(emit, c[swizzleY]);
1821 emit_dword(emit, c[swizzleZ]);
1822 emit_dword(emit, c[swizzleW]);
1823 }
1824 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) {
1825 /* Emit the register index(es) */
1826 if (index2d) {
1827 emit_dword(emit, index2);
1828
1829 if (indirect2d) {
1830 emit_indirect_register(emit, reg->DimIndirect.Index);
1831 }
1832 }
1833
1834 emit_dword(emit, remap_temp_index(emit, file, index));
1835
1836 if (indirect) {
1837 emit_indirect_register(emit, reg->Indirect.Index);
1838 }
1839 }
1840 }
1841
1842
1843 /**
1844 * Emit a resource operand (for use with a SAMPLE instruction).
1845 */
1846 static void
1847 emit_resource_register(struct svga_shader_emitter_v10 *emit,
1848 unsigned resource_number)
1849 {
1850 VGPU10OperandToken0 operand0;
1851
1852 check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number);
1853
1854 /* init */
1855 operand0.value = 0;
1856
1857 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
1858 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1859 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1860 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1861 operand0.swizzleX = VGPU10_COMPONENT_X;
1862 operand0.swizzleY = VGPU10_COMPONENT_Y;
1863 operand0.swizzleZ = VGPU10_COMPONENT_Z;
1864 operand0.swizzleW = VGPU10_COMPONENT_W;
1865
1866 emit_dword(emit, operand0.value);
1867 emit_dword(emit, resource_number);
1868 }
1869
1870
1871 /**
1872 * Emit a sampler operand (for use with a SAMPLE instruction).
1873 */
1874 static void
1875 emit_sampler_register(struct svga_shader_emitter_v10 *emit,
1876 unsigned sampler_number)
1877 {
1878 VGPU10OperandToken0 operand0;
1879
1880 check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number);
1881
1882 /* init */
1883 operand0.value = 0;
1884
1885 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
1886 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1887
1888 emit_dword(emit, operand0.value);
1889 emit_dword(emit, sampler_number);
1890 }
1891
1892
1893 /**
1894 * Emit an operand which reads the IS_FRONT_FACING register.
1895 */
1896 static void
1897 emit_face_register(struct svga_shader_emitter_v10 *emit)
1898 {
1899 VGPU10OperandToken0 operand0;
1900 unsigned index = emit->linkage.input_map[emit->fs.face_input_index];
1901
1902 /* init */
1903 operand0.value = 0;
1904
1905 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT;
1906 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1907 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1908 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1909
1910 operand0.swizzleX = VGPU10_COMPONENT_X;
1911 operand0.swizzleY = VGPU10_COMPONENT_X;
1912 operand0.swizzleZ = VGPU10_COMPONENT_X;
1913 operand0.swizzleW = VGPU10_COMPONENT_X;
1914
1915 emit_dword(emit, operand0.value);
1916 emit_dword(emit, index);
1917 }
1918
1919
1920 /**
1921 * Emit tokens for the "rasterizer" register used by the SAMPLE_POS
1922 * instruction.
1923 */
1924 static void
1925 emit_rasterizer_register(struct svga_shader_emitter_v10 *emit)
1926 {
1927 VGPU10OperandToken0 operand0;
1928
1929 /* init */
1930 operand0.value = 0;
1931
1932 /* No register index for rasterizer index (there's only one) */
1933 operand0.operandType = VGPU10_OPERAND_TYPE_RASTERIZER;
1934 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1935 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1936 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1937 operand0.swizzleX = VGPU10_COMPONENT_X;
1938 operand0.swizzleY = VGPU10_COMPONENT_Y;
1939 operand0.swizzleZ = VGPU10_COMPONENT_Z;
1940 operand0.swizzleW = VGPU10_COMPONENT_W;
1941
1942 emit_dword(emit, operand0.value);
1943 }
1944
1945
1946 /**
1947 * Emit tokens for the "stream" register used by the
1948 * DCL_STREAM, CUT_STREAM, EMIT_STREAM instructions.
1949 */
1950 static void
1951 emit_stream_register(struct svga_shader_emitter_v10 *emit, unsigned index)
1952 {
1953 VGPU10OperandToken0 operand0;
1954
1955 /* init */
1956 operand0.value = 0;
1957
1958 /* No register index for rasterizer index (there's only one) */
1959 operand0.operandType = VGPU10_OPERAND_TYPE_STREAM;
1960 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1961 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
1962
1963 emit_dword(emit, operand0.value);
1964 emit_dword(emit, index);
1965 }
1966
1967
1968 /**
1969 * Emit the token for a VGPU10 opcode, with precise parameter.
1970 * \param saturate clamp result to [0,1]?
1971 */
1972 static void
1973 emit_opcode_precise(struct svga_shader_emitter_v10 *emit,
1974 unsigned vgpu10_opcode, boolean saturate, boolean precise)
1975 {
1976 VGPU10OpcodeToken0 token0;
1977
1978 token0.value = 0; /* init all fields to zero */
1979 token0.opcodeType = vgpu10_opcode;
1980 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
1981 token0.saturate = saturate;
1982
1983 /* Mesa's GLSL IR -> TGSI translator will set the TGSI precise flag for
1984 * 'invariant' declarations. Only set preciseValues=1 if we have SM5.
1985 */
1986 token0.preciseValues = precise && emit->version >= 50;
1987
1988 emit_dword(emit, token0.value);
1989
1990 emit->uses_precise_qualifier |= token0.preciseValues;
1991 }
1992
1993
1994 /**
1995 * Emit the token for a VGPU10 opcode.
1996 * \param saturate clamp result to [0,1]?
1997 */
1998 static void
1999 emit_opcode(struct svga_shader_emitter_v10 *emit,
2000 unsigned vgpu10_opcode, boolean saturate)
2001 {
2002 emit_opcode_precise(emit, vgpu10_opcode, saturate, FALSE);
2003 }
2004
2005
2006 /**
2007 * Emit the token for a VGPU10 resinfo instruction.
2008 * \param modifier return type modifier, _uint or _rcpFloat.
2009 * TODO: We may want to remove this parameter if it will
2010 * only ever be used as _uint.
2011 */
2012 static void
2013 emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit,
2014 VGPU10_RESINFO_RETURN_TYPE modifier)
2015 {
2016 VGPU10OpcodeToken0 token0;
2017
2018 token0.value = 0; /* init all fields to zero */
2019 token0.opcodeType = VGPU10_OPCODE_RESINFO;
2020 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2021 token0.resinfoReturnType = modifier;
2022
2023 emit_dword(emit, token0.value);
2024 }
2025
2026
2027 /**
2028 * Emit opcode tokens for a texture sample instruction. Texture instructions
2029 * can be rather complicated (texel offsets, etc) so we have this specialized
2030 * function.
2031 */
2032 static void
2033 emit_sample_opcode(struct svga_shader_emitter_v10 *emit,
2034 unsigned vgpu10_opcode, boolean saturate,
2035 const int offsets[3])
2036 {
2037 VGPU10OpcodeToken0 token0;
2038 VGPU10OpcodeToken1 token1;
2039
2040 token0.value = 0; /* init all fields to zero */
2041 token0.opcodeType = vgpu10_opcode;
2042 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2043 token0.saturate = saturate;
2044
2045 if (offsets[0] || offsets[1] || offsets[2]) {
2046 assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2047 assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2048 assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2049 assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2050 assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2051 assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2052
2053 token0.extended = 1;
2054 token1.value = 0;
2055 token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS;
2056 token1.offsetU = offsets[0];
2057 token1.offsetV = offsets[1];
2058 token1.offsetW = offsets[2];
2059 }
2060
2061 emit_dword(emit, token0.value);
2062 if (token0.extended) {
2063 emit_dword(emit, token1.value);
2064 }
2065 }
2066
2067
2068 /**
2069 * Emit a DISCARD opcode token.
2070 * If nonzero is set, we'll discard the fragment if the X component is not 0.
2071 * Otherwise, we'll discard the fragment if the X component is 0.
2072 */
2073 static void
2074 emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero)
2075 {
2076 VGPU10OpcodeToken0 opcode0;
2077
2078 opcode0.value = 0;
2079 opcode0.opcodeType = VGPU10_OPCODE_DISCARD;
2080 if (nonzero)
2081 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
2082
2083 emit_dword(emit, opcode0.value);
2084 }
2085
2086
2087 /**
2088 * We need to call this before we begin emitting a VGPU10 instruction.
2089 */
2090 static void
2091 begin_emit_instruction(struct svga_shader_emitter_v10 *emit)
2092 {
2093 assert(emit->inst_start_token == 0);
2094 /* Save location of the instruction's VGPU10OpcodeToken0 token.
2095 * Note, we can't save a pointer because it would become invalid if
2096 * we have to realloc the output buffer.
2097 */
2098 emit->inst_start_token = emit_get_num_tokens(emit);
2099 }
2100
2101
2102 /**
2103 * We need to call this after we emit the last token of a VGPU10 instruction.
2104 * This function patches in the opcode token's instructionLength field.
2105 */
2106 static void
2107 end_emit_instruction(struct svga_shader_emitter_v10 *emit)
2108 {
2109 VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
2110 unsigned inst_length;
2111
2112 assert(emit->inst_start_token > 0);
2113
2114 if (emit->discard_instruction) {
2115 /* Back up the emit->ptr to where this instruction started so
2116 * that we discard the current instruction.
2117 */
2118 emit->ptr = (char *) (tokens + emit->inst_start_token);
2119 }
2120 else {
2121 /* Compute instruction length and patch that into the start of
2122 * the instruction.
2123 */
2124 inst_length = emit_get_num_tokens(emit) - emit->inst_start_token;
2125
2126 assert(inst_length > 0);
2127
2128 tokens[emit->inst_start_token].instructionLength = inst_length;
2129 }
2130
2131 emit->inst_start_token = 0; /* reset to zero for error checking */
2132 emit->discard_instruction = FALSE;
2133 }
2134
2135
2136 /**
2137 * Return index for a free temporary register.
2138 */
2139 static unsigned
2140 get_temp_index(struct svga_shader_emitter_v10 *emit)
2141 {
2142 assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS);
2143 return emit->num_shader_temps + emit->internal_temp_count++;
2144 }
2145
2146
2147 /**
2148 * Release the temporaries which were generated by get_temp_index().
2149 */
2150 static void
2151 free_temp_indexes(struct svga_shader_emitter_v10 *emit)
2152 {
2153 emit->internal_temp_count = 0;
2154 }
2155
2156
2157 /**
2158 * Create a tgsi_full_src_register.
2159 */
2160 static struct tgsi_full_src_register
2161 make_src_reg(enum tgsi_file_type file, unsigned index)
2162 {
2163 struct tgsi_full_src_register reg;
2164
2165 memset(&reg, 0, sizeof(reg));
2166 reg.Register.File = file;
2167 reg.Register.Index = index;
2168 reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2169 reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2170 reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2171 reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2172 return reg;
2173 }
2174
2175
2176 /**
2177 * Create a tgsi_full_src_register with a swizzle such that all four
2178 * vector components have the same scalar value.
2179 */
2180 static struct tgsi_full_src_register
2181 make_src_scalar_reg(enum tgsi_file_type file, unsigned index, unsigned component)
2182 {
2183 struct tgsi_full_src_register reg;
2184
2185 assert(component >= TGSI_SWIZZLE_X);
2186 assert(component <= TGSI_SWIZZLE_W);
2187
2188 memset(&reg, 0, sizeof(reg));
2189 reg.Register.File = file;
2190 reg.Register.Index = index;
2191 reg.Register.SwizzleX =
2192 reg.Register.SwizzleY =
2193 reg.Register.SwizzleZ =
2194 reg.Register.SwizzleW = component;
2195 return reg;
2196 }
2197
2198
2199 /**
2200 * Create a tgsi_full_src_register for a temporary.
2201 */
2202 static struct tgsi_full_src_register
2203 make_src_temp_reg(unsigned index)
2204 {
2205 return make_src_reg(TGSI_FILE_TEMPORARY, index);
2206 }
2207
2208
2209 /**
2210 * Create a tgsi_full_src_register for a constant.
2211 */
2212 static struct tgsi_full_src_register
2213 make_src_const_reg(unsigned index)
2214 {
2215 return make_src_reg(TGSI_FILE_CONSTANT, index);
2216 }
2217
2218
2219 /**
2220 * Create a tgsi_full_src_register for an immediate constant.
2221 */
2222 static struct tgsi_full_src_register
2223 make_src_immediate_reg(unsigned index)
2224 {
2225 return make_src_reg(TGSI_FILE_IMMEDIATE, index);
2226 }
2227
2228
2229 /**
2230 * Create a tgsi_full_dst_register.
2231 */
2232 static struct tgsi_full_dst_register
2233 make_dst_reg(enum tgsi_file_type file, unsigned index)
2234 {
2235 struct tgsi_full_dst_register reg;
2236
2237 memset(&reg, 0, sizeof(reg));
2238 reg.Register.File = file;
2239 reg.Register.Index = index;
2240 reg.Register.WriteMask = TGSI_WRITEMASK_XYZW;
2241 return reg;
2242 }
2243
2244
2245 /**
2246 * Create a tgsi_full_dst_register for a temporary.
2247 */
2248 static struct tgsi_full_dst_register
2249 make_dst_temp_reg(unsigned index)
2250 {
2251 return make_dst_reg(TGSI_FILE_TEMPORARY, index);
2252 }
2253
2254
2255 /**
2256 * Create a tgsi_full_dst_register for an output.
2257 */
2258 static struct tgsi_full_dst_register
2259 make_dst_output_reg(unsigned index)
2260 {
2261 return make_dst_reg(TGSI_FILE_OUTPUT, index);
2262 }
2263
2264
2265 /**
2266 * Create negated tgsi_full_src_register.
2267 */
2268 static struct tgsi_full_src_register
2269 negate_src(const struct tgsi_full_src_register *reg)
2270 {
2271 struct tgsi_full_src_register neg = *reg;
2272 neg.Register.Negate = !reg->Register.Negate;
2273 return neg;
2274 }
2275
2276 /**
2277 * Create absolute value of a tgsi_full_src_register.
2278 */
2279 static struct tgsi_full_src_register
2280 absolute_src(const struct tgsi_full_src_register *reg)
2281 {
2282 struct tgsi_full_src_register absolute = *reg;
2283 absolute.Register.Absolute = 1;
2284 return absolute;
2285 }
2286
2287
2288 /** Return the named swizzle term from the src register */
2289 static inline unsigned
2290 get_swizzle(const struct tgsi_full_src_register *reg, enum tgsi_swizzle term)
2291 {
2292 switch (term) {
2293 case TGSI_SWIZZLE_X:
2294 return reg->Register.SwizzleX;
2295 case TGSI_SWIZZLE_Y:
2296 return reg->Register.SwizzleY;
2297 case TGSI_SWIZZLE_Z:
2298 return reg->Register.SwizzleZ;
2299 case TGSI_SWIZZLE_W:
2300 return reg->Register.SwizzleW;
2301 default:
2302 assert(!"Bad swizzle");
2303 return TGSI_SWIZZLE_X;
2304 }
2305 }
2306
2307
2308 /**
2309 * Create swizzled tgsi_full_src_register.
2310 */
2311 static struct tgsi_full_src_register
2312 swizzle_src(const struct tgsi_full_src_register *reg,
2313 enum tgsi_swizzle swizzleX, enum tgsi_swizzle swizzleY,
2314 enum tgsi_swizzle swizzleZ, enum tgsi_swizzle swizzleW)
2315 {
2316 struct tgsi_full_src_register swizzled = *reg;
2317 /* Note: we swizzle the current swizzle */
2318 swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX);
2319 swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY);
2320 swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ);
2321 swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW);
2322 return swizzled;
2323 }
2324
2325
2326 /**
2327 * Create swizzled tgsi_full_src_register where all the swizzle
2328 * terms are the same.
2329 */
2330 static struct tgsi_full_src_register
2331 scalar_src(const struct tgsi_full_src_register *reg, enum tgsi_swizzle swizzle)
2332 {
2333 struct tgsi_full_src_register swizzled = *reg;
2334 /* Note: we swizzle the current swizzle */
2335 swizzled.Register.SwizzleX =
2336 swizzled.Register.SwizzleY =
2337 swizzled.Register.SwizzleZ =
2338 swizzled.Register.SwizzleW = get_swizzle(reg, swizzle);
2339 return swizzled;
2340 }
2341
2342
2343 /**
2344 * Create new tgsi_full_dst_register with writemask.
2345 * \param mask bitmask of TGSI_WRITEMASK_[XYZW]
2346 */
2347 static struct tgsi_full_dst_register
2348 writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask)
2349 {
2350 struct tgsi_full_dst_register masked = *reg;
2351 masked.Register.WriteMask = mask;
2352 return masked;
2353 }
2354
2355
2356 /**
2357 * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW.
2358 */
2359 static boolean
2360 same_swizzle_terms(const struct tgsi_full_src_register *reg)
2361 {
2362 return (reg->Register.SwizzleX == reg->Register.SwizzleY &&
2363 reg->Register.SwizzleY == reg->Register.SwizzleZ &&
2364 reg->Register.SwizzleZ == reg->Register.SwizzleW);
2365 }
2366
2367
2368 /**
2369 * Search the vector for the value 'x' and return its position.
2370 */
2371 static int
2372 find_imm_in_vec4(const union tgsi_immediate_data vec[4],
2373 union tgsi_immediate_data x)
2374 {
2375 unsigned i;
2376 for (i = 0; i < 4; i++) {
2377 if (vec[i].Int == x.Int)
2378 return i;
2379 }
2380 return -1;
2381 }
2382
2383
2384 /**
2385 * Helper used by make_immediate_reg(), make_immediate_reg_4().
2386 */
2387 static int
2388 find_immediate(struct svga_shader_emitter_v10 *emit,
2389 union tgsi_immediate_data x, unsigned startIndex)
2390 {
2391 const unsigned endIndex = emit->num_immediates;
2392 unsigned i;
2393
2394 assert(emit->immediates_emitted);
2395
2396 /* Search immediates for x, y, z, w */
2397 for (i = startIndex; i < endIndex; i++) {
2398 if (x.Int == emit->immediates[i][0].Int ||
2399 x.Int == emit->immediates[i][1].Int ||
2400 x.Int == emit->immediates[i][2].Int ||
2401 x.Int == emit->immediates[i][3].Int) {
2402 return i;
2403 }
2404 }
2405 /* Should never try to use an immediate value that wasn't pre-declared */
2406 assert(!"find_immediate() failed!");
2407 return -1;
2408 }
2409
2410
2411 /**
2412 * As above, but search for a double[2] pair.
2413 */
2414 static int
2415 find_immediate_dbl(struct svga_shader_emitter_v10 *emit,
2416 double x, double y)
2417 {
2418 const unsigned endIndex = emit->num_immediates;
2419 unsigned i;
2420
2421 assert(emit->immediates_emitted);
2422
2423 /* Search immediates for x, y, z, w */
2424 for (i = 0; i < endIndex; i++) {
2425 if (x == emit->immediates_dbl[i][0] &&
2426 y == emit->immediates_dbl[i][1]) {
2427 return i;
2428 }
2429 }
2430 /* Should never try to use an immediate value that wasn't pre-declared */
2431 assert(!"find_immediate_dbl() failed!");
2432 return -1;
2433 }
2434
2435
2436
2437 /**
2438 * Return a tgsi_full_src_register for an immediate/literal
2439 * union tgsi_immediate_data[4] value.
2440 * Note: the values must have been previously declared/allocated in
2441 * emit_pre_helpers(). And, all of x,y,z,w must be located in the same
2442 * vec4 immediate.
2443 */
2444 static struct tgsi_full_src_register
2445 make_immediate_reg_4(struct svga_shader_emitter_v10 *emit,
2446 const union tgsi_immediate_data imm[4])
2447 {
2448 struct tgsi_full_src_register reg;
2449 unsigned i;
2450
2451 for (i = 0; i < emit->num_common_immediates; i++) {
2452 /* search for first component value */
2453 int immpos = find_immediate(emit, imm[0], i);
2454 int x, y, z, w;
2455
2456 assert(immpos >= 0);
2457
2458 /* find remaining components within the immediate vector */
2459 x = find_imm_in_vec4(emit->immediates[immpos], imm[0]);
2460 y = find_imm_in_vec4(emit->immediates[immpos], imm[1]);
2461 z = find_imm_in_vec4(emit->immediates[immpos], imm[2]);
2462 w = find_imm_in_vec4(emit->immediates[immpos], imm[3]);
2463
2464 if (x >=0 && y >= 0 && z >= 0 && w >= 0) {
2465 /* found them all */
2466 memset(&reg, 0, sizeof(reg));
2467 reg.Register.File = TGSI_FILE_IMMEDIATE;
2468 reg.Register.Index = immpos;
2469 reg.Register.SwizzleX = x;
2470 reg.Register.SwizzleY = y;
2471 reg.Register.SwizzleZ = z;
2472 reg.Register.SwizzleW = w;
2473 return reg;
2474 }
2475 /* else, keep searching */
2476 }
2477
2478 assert(!"Failed to find immediate register!");
2479
2480 /* Just return IMM[0].xxxx */
2481 memset(&reg, 0, sizeof(reg));
2482 reg.Register.File = TGSI_FILE_IMMEDIATE;
2483 return reg;
2484 }
2485
2486
2487 /**
2488 * Return a tgsi_full_src_register for an immediate/literal
2489 * union tgsi_immediate_data value of the form {value, value, value, value}.
2490 * \sa make_immediate_reg_4() regarding allowed values.
2491 */
2492 static struct tgsi_full_src_register
2493 make_immediate_reg(struct svga_shader_emitter_v10 *emit,
2494 union tgsi_immediate_data value)
2495 {
2496 struct tgsi_full_src_register reg;
2497 int immpos = find_immediate(emit, value, 0);
2498
2499 assert(immpos >= 0);
2500
2501 memset(&reg, 0, sizeof(reg));
2502 reg.Register.File = TGSI_FILE_IMMEDIATE;
2503 reg.Register.Index = immpos;
2504 reg.Register.SwizzleX =
2505 reg.Register.SwizzleY =
2506 reg.Register.SwizzleZ =
2507 reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value);
2508
2509 return reg;
2510 }
2511
2512
2513 /**
2514 * Return a tgsi_full_src_register for an immediate/literal float[4] value.
2515 * \sa make_immediate_reg_4() regarding allowed values.
2516 */
2517 static struct tgsi_full_src_register
2518 make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit,
2519 float x, float y, float z, float w)
2520 {
2521 union tgsi_immediate_data imm[4];
2522 imm[0].Float = x;
2523 imm[1].Float = y;
2524 imm[2].Float = z;
2525 imm[3].Float = w;
2526 return make_immediate_reg_4(emit, imm);
2527 }
2528
2529
2530 /**
2531 * Return a tgsi_full_src_register for an immediate/literal float value
2532 * of the form {value, value, value, value}.
2533 * \sa make_immediate_reg_4() regarding allowed values.
2534 */
2535 static struct tgsi_full_src_register
2536 make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value)
2537 {
2538 union tgsi_immediate_data imm;
2539 imm.Float = value;
2540 return make_immediate_reg(emit, imm);
2541 }
2542
2543
2544 /**
2545 * Return a tgsi_full_src_register for an immediate/literal int[4] vector.
2546 */
2547 static struct tgsi_full_src_register
2548 make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit,
2549 int x, int y, int z, int w)
2550 {
2551 union tgsi_immediate_data imm[4];
2552 imm[0].Int = x;
2553 imm[1].Int = y;
2554 imm[2].Int = z;
2555 imm[3].Int = w;
2556 return make_immediate_reg_4(emit, imm);
2557 }
2558
2559
2560 /**
2561 * Return a tgsi_full_src_register for an immediate/literal int value
2562 * of the form {value, value, value, value}.
2563 * \sa make_immediate_reg_4() regarding allowed values.
2564 */
2565 static struct tgsi_full_src_register
2566 make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value)
2567 {
2568 union tgsi_immediate_data imm;
2569 imm.Int = value;
2570 return make_immediate_reg(emit, imm);
2571 }
2572
2573
2574 static struct tgsi_full_src_register
2575 make_immediate_reg_double(struct svga_shader_emitter_v10 *emit, double value)
2576 {
2577 struct tgsi_full_src_register reg;
2578 int immpos = find_immediate_dbl(emit, value, value);
2579
2580 assert(immpos >= 0);
2581
2582 memset(&reg, 0, sizeof(reg));
2583 reg.Register.File = TGSI_FILE_IMMEDIATE;
2584 reg.Register.Index = immpos;
2585 reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2586 reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2587 reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2588 reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2589
2590 return reg;
2591 }
2592
2593
2594 /**
2595 * Allocate space for a union tgsi_immediate_data[4] immediate.
2596 * \return the index/position of the immediate.
2597 */
2598 static unsigned
2599 alloc_immediate_4(struct svga_shader_emitter_v10 *emit,
2600 const union tgsi_immediate_data imm[4])
2601 {
2602 unsigned n = emit->num_immediates++;
2603 assert(!emit->immediates_emitted);
2604 assert(n < ARRAY_SIZE(emit->immediates));
2605 emit->immediates[n][0] = imm[0];
2606 emit->immediates[n][1] = imm[1];
2607 emit->immediates[n][2] = imm[2];
2608 emit->immediates[n][3] = imm[3];
2609 return n;
2610 }
2611
2612
2613 /**
2614 * Allocate space for a float[4] immediate.
2615 * \return the index/position of the immediate.
2616 */
2617 static unsigned
2618 alloc_immediate_float4(struct svga_shader_emitter_v10 *emit,
2619 float x, float y, float z, float w)
2620 {
2621 union tgsi_immediate_data imm[4];
2622 imm[0].Float = x;
2623 imm[1].Float = y;
2624 imm[2].Float = z;
2625 imm[3].Float = w;
2626 return alloc_immediate_4(emit, imm);
2627 }
2628
2629
2630 /**
2631 * Allocate space for an int[4] immediate.
2632 * \return the index/position of the immediate.
2633 */
2634 static unsigned
2635 alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
2636 int x, int y, int z, int w)
2637 {
2638 union tgsi_immediate_data imm[4];
2639 imm[0].Int = x;
2640 imm[1].Int = y;
2641 imm[2].Int = z;
2642 imm[3].Int = w;
2643 return alloc_immediate_4(emit, imm);
2644 }
2645
2646
2647 static unsigned
2648 alloc_immediate_double2(struct svga_shader_emitter_v10 *emit,
2649 double x, double y)
2650 {
2651 unsigned n = emit->num_immediates++;
2652 assert(!emit->immediates_emitted);
2653 assert(n < ARRAY_SIZE(emit->immediates));
2654 emit->immediates_dbl[n][0] = x;
2655 emit->immediates_dbl[n][1] = y;
2656 return n;
2657
2658 }
2659
2660
2661 /**
2662 * Allocate a shader input to store a system value.
2663 */
2664 static unsigned
2665 alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
2666 {
2667 const unsigned n = emit->linkage.input_map_max + 1 + index;
2668 assert(index < ARRAY_SIZE(emit->system_value_indexes));
2669 emit->system_value_indexes[index] = n;
2670 return n;
2671 }
2672
2673
2674 /**
2675 * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10.
2676 */
2677 static boolean
2678 emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit,
2679 const struct tgsi_full_immediate *imm)
2680 {
2681 /* We don't actually emit any code here. We just save the
2682 * immediate values and emit them later.
2683 */
2684 alloc_immediate_4(emit, imm->u);
2685 return TRUE;
2686 }
2687
2688
2689 /**
2690 * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block
2691 * containing all the immediate values previously allocated
2692 * with alloc_immediate_4().
2693 */
2694 static boolean
2695 emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit)
2696 {
2697 VGPU10OpcodeToken0 token;
2698
2699 assert(!emit->immediates_emitted);
2700
2701 token.value = 0;
2702 token.opcodeType = VGPU10_OPCODE_CUSTOMDATA;
2703 token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER;
2704
2705 /* Note: no begin/end_emit_instruction() calls */
2706 emit_dword(emit, token.value);
2707 emit_dword(emit, 2 + 4 * emit->num_immediates);
2708 emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates);
2709
2710 emit->immediates_emitted = TRUE;
2711
2712 return TRUE;
2713 }
2714
2715
2716 /**
2717 * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10
2718 * interpolation mode.
2719 * \return a VGPU10_INTERPOLATION_x value
2720 */
2721 static unsigned
2722 translate_interpolation(const struct svga_shader_emitter_v10 *emit,
2723 enum tgsi_interpolate_mode interp,
2724 enum tgsi_interpolate_loc interpolate_loc)
2725 {
2726 if (interp == TGSI_INTERPOLATE_COLOR) {
2727 interp = emit->key.fs.flatshade ?
2728 TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
2729 }
2730
2731 switch (interp) {
2732 case TGSI_INTERPOLATE_CONSTANT:
2733 return VGPU10_INTERPOLATION_CONSTANT;
2734 case TGSI_INTERPOLATE_LINEAR:
2735 if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2736 return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID;
2737 } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2738 emit->version >= 41) {
2739 return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE;
2740 } else {
2741 return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
2742 }
2743 break;
2744 case TGSI_INTERPOLATE_PERSPECTIVE:
2745 if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2746 return VGPU10_INTERPOLATION_LINEAR_CENTROID;
2747 } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2748 emit->version >= 41) {
2749 return VGPU10_INTERPOLATION_LINEAR_SAMPLE;
2750 } else {
2751 return VGPU10_INTERPOLATION_LINEAR;
2752 }
2753 break;
2754 default:
2755 assert(!"Unexpected interpolation mode");
2756 return VGPU10_INTERPOLATION_CONSTANT;
2757 }
2758 }
2759
2760
2761 /**
2762 * Translate a TGSI property to VGPU10.
2763 * Don't emit any instructions yet, only need to gather the primitive property
2764 * information. The output primitive topology might be changed later. The
2765 * final property instructions will be emitted as part of the pre-helper code.
2766 */
2767 static boolean
2768 emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
2769 const struct tgsi_full_property *prop)
2770 {
2771 static const VGPU10_PRIMITIVE primType[] = {
2772 VGPU10_PRIMITIVE_POINT, /* PIPE_PRIM_POINTS */
2773 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINES */
2774 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_LOOP */
2775 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_STRIP */
2776 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLES */
2777 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_STRIP */
2778 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_FAN */
2779 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUADS */
2780 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */
2781 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_POLYGON */
2782 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */
2783 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
2784 VGPU10_PRIMITIVE_TRIANGLE_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
2785 VGPU10_PRIMITIVE_TRIANGLE_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
2786 };
2787
2788 static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = {
2789 VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST, /* PIPE_PRIM_POINTS */
2790 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINES */
2791 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINE_LOOP */
2792 VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP, /* PIPE_PRIM_LINE_STRIP */
2793 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST, /* PIPE_PRIM_TRIANGLES */
2794 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */
2795 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */
2796 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUADS */
2797 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */
2798 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_POLYGON */
2799 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */
2800 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
2801 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
2802 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
2803 };
2804
2805 static const unsigned inputArraySize[] = {
2806 0, /* VGPU10_PRIMITIVE_UNDEFINED */
2807 1, /* VGPU10_PRIMITIVE_POINT */
2808 2, /* VGPU10_PRIMITIVE_LINE */
2809 3, /* VGPU10_PRIMITIVE_TRIANGLE */
2810 0,
2811 0,
2812 4, /* VGPU10_PRIMITIVE_LINE_ADJ */
2813 6 /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */
2814 };
2815
2816 switch (prop->Property.PropertyName) {
2817 case TGSI_PROPERTY_GS_INPUT_PRIM:
2818 assert(prop->u[0].Data < ARRAY_SIZE(primType));
2819 emit->gs.prim_type = primType[prop->u[0].Data];
2820 assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED);
2821 emit->gs.input_size = inputArraySize[emit->gs.prim_type];
2822 break;
2823
2824 case TGSI_PROPERTY_GS_OUTPUT_PRIM:
2825 assert(prop->u[0].Data < ARRAY_SIZE(primTopology));
2826 emit->gs.prim_topology = primTopology[prop->u[0].Data];
2827 assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED);
2828 break;
2829
2830 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
2831 emit->gs.max_out_vertices = prop->u[0].Data;
2832 break;
2833
2834 case TGSI_PROPERTY_GS_INVOCATIONS:
2835 emit->gs.invocations = prop->u[0].Data;
2836 break;
2837
2838 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
2839 case TGSI_PROPERTY_NEXT_SHADER:
2840 case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
2841 /* no-op */
2842 break;
2843
2844 case TGSI_PROPERTY_TCS_VERTICES_OUT:
2845 /* This info is already captured in the shader key */
2846 break;
2847
2848 case TGSI_PROPERTY_TES_PRIM_MODE:
2849 emit->tes.prim_mode = prop->u[0].Data;
2850 break;
2851
2852 case TGSI_PROPERTY_TES_SPACING:
2853 emit->tes.spacing = prop->u[0].Data;
2854 break;
2855
2856 case TGSI_PROPERTY_TES_VERTEX_ORDER_CW:
2857 emit->tes.vertices_order_cw = prop->u[0].Data;
2858 break;
2859
2860 case TGSI_PROPERTY_TES_POINT_MODE:
2861 emit->tes.point_mode = prop->u[0].Data;
2862 break;
2863
2864 default:
2865 debug_printf("Unexpected TGSI property %s\n",
2866 tgsi_property_names[prop->Property.PropertyName]);
2867 }
2868
2869 return TRUE;
2870 }
2871
2872
2873 static void
2874 emit_property_instruction(struct svga_shader_emitter_v10 *emit,
2875 VGPU10OpcodeToken0 opcode0, unsigned nData,
2876 unsigned data)
2877 {
2878 begin_emit_instruction(emit);
2879 emit_dword(emit, opcode0.value);
2880 if (nData)
2881 emit_dword(emit, data);
2882 end_emit_instruction(emit);
2883 }
2884
2885
2886 /**
2887 * Emit property instructions
2888 */
2889 static void
2890 emit_property_instructions(struct svga_shader_emitter_v10 *emit)
2891 {
2892 VGPU10OpcodeToken0 opcode0;
2893
2894 assert(emit->unit == PIPE_SHADER_GEOMETRY);
2895
2896 /* emit input primitive type declaration */
2897 opcode0.value = 0;
2898 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE;
2899 opcode0.primitive = emit->gs.prim_type;
2900 emit_property_instruction(emit, opcode0, 0, 0);
2901
2902 /* emit max output vertices */
2903 opcode0.value = 0;
2904 opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT;
2905 emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices);
2906
2907 if (emit->version >= 50 && emit->gs.invocations > 0) {
2908 opcode0.value = 0;
2909 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT;
2910 emit_property_instruction(emit, opcode0, 1, emit->gs.invocations);
2911 }
2912 }
2913
2914
2915 /**
2916 * A helper function to declare tessellator domain in a hull shader or
2917 * in the domain shader.
2918 */
2919 static void
2920 emit_tessellator_domain(struct svga_shader_emitter_v10 *emit,
2921 enum pipe_prim_type prim_mode)
2922 {
2923 VGPU10OpcodeToken0 opcode0;
2924
2925 opcode0.value = 0;
2926 opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_DOMAIN;
2927 switch (prim_mode) {
2928 case PIPE_PRIM_QUADS:
2929 case PIPE_PRIM_LINES:
2930 opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_QUAD;
2931 break;
2932 case PIPE_PRIM_TRIANGLES:
2933 opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_TRI;
2934 break;
2935 default:
2936 debug_printf("Invalid tessellator prim mode %d\n", prim_mode);
2937 opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_UNDEFINED;
2938 }
2939 begin_emit_instruction(emit);
2940 emit_dword(emit, opcode0.value);
2941 end_emit_instruction(emit);
2942 }
2943
2944
2945 /**
2946 * Emit domain shader declarations.
2947 */
2948 static void
2949 emit_domain_shader_declarations(struct svga_shader_emitter_v10 *emit)
2950 {
2951 VGPU10OpcodeToken0 opcode0;
2952
2953 assert(emit->unit == PIPE_SHADER_TESS_EVAL);
2954
2955 /* Emit the input control point count */
2956 assert(emit->key.tes.vertices_per_patch >= 0 &&
2957 emit->key.tes.vertices_per_patch <= 32);
2958
2959 opcode0.value = 0;
2960 opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
2961 opcode0.controlPointCount = emit->key.tes.vertices_per_patch;
2962 begin_emit_instruction(emit);
2963 emit_dword(emit, opcode0.value);
2964 end_emit_instruction(emit);
2965
2966 emit_tessellator_domain(emit, emit->tes.prim_mode);
2967 }
2968
2969
2970 /**
2971 * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
2972 * to implement some instructions. We pre-allocate those values here
2973 * in the immediate constant buffer.
2974 */
2975 static void
2976 alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
2977 {
2978 unsigned n = 0;
2979
2980 emit->common_immediate_pos[n++] =
2981 alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
2982
2983 if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) {
2984 emit->common_immediate_pos[n++] =
2985 alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f);
2986 }
2987
2988 emit->common_immediate_pos[n++] =
2989 alloc_immediate_int4(emit, 0, 1, 0, -1);
2990
2991 if (emit->info.opcode_count[TGSI_OPCODE_IMSB] > 0 ||
2992 emit->info.opcode_count[TGSI_OPCODE_UMSB] > 0) {
2993 emit->common_immediate_pos[n++] =
2994 alloc_immediate_int4(emit, 31, 0, 0, 0);
2995 }
2996
2997 if (emit->info.opcode_count[TGSI_OPCODE_UBFE] > 0 ||
2998 emit->info.opcode_count[TGSI_OPCODE_IBFE] > 0 ||
2999 emit->info.opcode_count[TGSI_OPCODE_BFI] > 0) {
3000 emit->common_immediate_pos[n++] =
3001 alloc_immediate_int4(emit, 32, 0, 0, 0);
3002 }
3003
3004 if (emit->key.vs.attrib_puint_to_snorm) {
3005 emit->common_immediate_pos[n++] =
3006 alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f);
3007 }
3008
3009 if (emit->key.vs.attrib_puint_to_uscaled) {
3010 emit->common_immediate_pos[n++] =
3011 alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f);
3012 }
3013
3014 if (emit->key.vs.attrib_puint_to_sscaled) {
3015 emit->common_immediate_pos[n++] =
3016 alloc_immediate_int4(emit, 22, 12, 2, 0);
3017
3018 emit->common_immediate_pos[n++] =
3019 alloc_immediate_int4(emit, 22, 30, 0, 0);
3020 }
3021
3022 if (emit->vposition.num_prescale > 1) {
3023 unsigned i;
3024 for (i = 0; i < emit->vposition.num_prescale; i+=4) {
3025 emit->common_immediate_pos[n++] =
3026 alloc_immediate_int4(emit, i, i+1, i+2, i+3);
3027 }
3028 }
3029
3030 emit->immediates_dbl = (double (*)[2]) emit->immediates;
3031
3032 if (emit->info.opcode_count[TGSI_OPCODE_DNEG] > 0) {
3033 emit->common_immediate_pos[n++] =
3034 alloc_immediate_double2(emit, -1.0, -1.0);
3035 }
3036
3037 if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0) {
3038 emit->common_immediate_pos[n++] =
3039 alloc_immediate_double2(emit, 0.0, 0.0);
3040 emit->common_immediate_pos[n++] =
3041 alloc_immediate_double2(emit, 1.0, 1.0);
3042 }
3043
3044 if (emit->info.opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0) {
3045 emit->common_immediate_pos[n++] =
3046 alloc_immediate_float4(emit, 16.0f, -16.0f, 0.0, 0.0);
3047 }
3048
3049 assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3050
3051 unsigned i;
3052
3053 for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
3054 if (emit->key.tex[i].texel_bias) {
3055 /* Replace 0.0f if more immediate float value is needed */
3056 emit->common_immediate_pos[n++] =
3057 alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f);
3058 break;
3059 }
3060 }
3061
3062 assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3063 emit->num_common_immediates = n;
3064 }
3065
3066
3067 /**
3068 * Emit hull shader declarations.
3069 */
3070 static void
3071 emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit)
3072 {
3073 VGPU10OpcodeToken0 opcode0;
3074
3075 /* Emit the input control point count */
3076 assert(emit->key.tcs.vertices_per_patch > 0 &&
3077 emit->key.tcs.vertices_per_patch <= 32);
3078
3079 opcode0.value = 0;
3080 opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
3081 opcode0.controlPointCount = emit->key.tcs.vertices_per_patch;
3082 begin_emit_instruction(emit);
3083 emit_dword(emit, opcode0.value);
3084 end_emit_instruction(emit);
3085
3086 /* Emit the output control point count */
3087 assert(emit->key.tcs.vertices_out >= 0 && emit->key.tcs.vertices_out <= 32);
3088
3089 opcode0.value = 0;
3090 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT;
3091 opcode0.controlPointCount = emit->key.tcs.vertices_out;
3092 begin_emit_instruction(emit);
3093 emit_dword(emit, opcode0.value);
3094 end_emit_instruction(emit);
3095
3096 /* Emit tessellator domain */
3097 emit_tessellator_domain(emit, emit->key.tcs.prim_mode);
3098
3099 /* Emit tessellator output primitive */
3100 opcode0.value = 0;
3101 opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE;
3102 if (emit->key.tcs.point_mode) {
3103 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_POINT;
3104 }
3105 else if (emit->key.tcs.prim_mode == PIPE_PRIM_LINES) {
3106 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_LINE;
3107 }
3108 else {
3109 assert(emit->key.tcs.prim_mode == PIPE_PRIM_QUADS ||
3110 emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES);
3111
3112 if (emit->key.tcs.vertices_order_cw)
3113 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CCW;
3114 else
3115 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CW;
3116 }
3117 begin_emit_instruction(emit);
3118 emit_dword(emit, opcode0.value);
3119 end_emit_instruction(emit);
3120
3121 /* Emit tessellator partitioning */
3122 opcode0.value = 0;
3123 opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_PARTITIONING;
3124 switch (emit->key.tcs.spacing) {
3125 case PIPE_TESS_SPACING_FRACTIONAL_ODD:
3126 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
3127 break;
3128 case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
3129 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
3130 break;
3131 case PIPE_TESS_SPACING_EQUAL:
3132 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_INTEGER;
3133 break;
3134 default:
3135 debug_printf("invalid tessellator spacing %d\n", emit->key.tcs.spacing);
3136 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_UNDEFINED;
3137 }
3138 begin_emit_instruction(emit);
3139 emit_dword(emit, opcode0.value);
3140 end_emit_instruction(emit);
3141
3142 /* Declare constant registers */
3143 emit_constant_declaration(emit);
3144
3145 /* Declare samplers and resources */
3146 emit_sampler_declarations(emit);
3147 emit_resource_declarations(emit);
3148
3149 alloc_common_immediates(emit);
3150
3151 int nVertices = emit->key.tcs.vertices_per_patch;
3152 emit->tcs.imm_index =
3153 alloc_immediate_int4(emit, nVertices, nVertices, nVertices, 0);
3154
3155 /* Now, emit the constant block containing all the immediates
3156 * declared by shader, as well as the extra ones seen above.
3157 */
3158 emit_vgpu10_immediates_block(emit);
3159
3160 }
3161
3162
3163 /**
3164 * A helper function to determine if control point phase is needed.
3165 * Returns TRUE if there is control point output.
3166 */
3167 static boolean
3168 needs_control_point_phase(struct svga_shader_emitter_v10 *emit)
3169 {
3170 unsigned i;
3171
3172 assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3173
3174 /* If output control point count does not match the input count,
3175 * we need a control point phase to explicitly set the output control
3176 * points.
3177 */
3178 if ((emit->key.tcs.vertices_per_patch != emit->key.tcs.vertices_out) &&
3179 emit->key.tcs.vertices_out)
3180 return TRUE;
3181
3182 for (i = 0; i < emit->info.num_outputs; i++) {
3183 switch (emit->info.output_semantic_name[i]) {
3184 case TGSI_SEMANTIC_PATCH:
3185 case TGSI_SEMANTIC_TESSOUTER:
3186 case TGSI_SEMANTIC_TESSINNER:
3187 break;
3188 default:
3189 return TRUE;
3190 }
3191 }
3192 return FALSE;
3193 }
3194
3195
3196 /**
3197 * A helper function to add shader signature for passthrough control point
3198 * phase. This signature is also generated for passthrough control point
3199 * phase from HLSL compiler and is needed by Metal Renderer.
3200 */
3201 static void
3202 emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 *emit)
3203 {
3204 struct svga_shader_signature *sgn = &emit->signature;
3205 SVGA3dDXShaderSignatureEntry *sgnEntry;
3206 unsigned i;
3207
3208 for (i = 0; i < emit->info.num_inputs; i++) {
3209 unsigned index = emit->linkage.input_map[i];
3210 enum tgsi_semantic sem_name = emit->info.input_semantic_name[i];
3211
3212 sgnEntry = &sgn->inputs[sgn->header.numInputSignatures++];
3213
3214 set_shader_signature_entry(sgnEntry, index,
3215 tgsi_semantic_to_sgn_name[sem_name],
3216 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3217 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3218 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3219
3220 sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
3221
3222 set_shader_signature_entry(sgnEntry, i,
3223 tgsi_semantic_to_sgn_name[sem_name],
3224 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3225 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3226 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3227 }
3228 }
3229
3230
3231 /**
3232 * A helper function to emit an instruction to start the control point phase
3233 * in the hull shader.
3234 */
3235 static void
3236 emit_control_point_phase_instruction(struct svga_shader_emitter_v10 *emit)
3237 {
3238 VGPU10OpcodeToken0 opcode0;
3239
3240 opcode0.value = 0;
3241 opcode0.opcodeType = VGPU10_OPCODE_HS_CONTROL_POINT_PHASE;
3242 begin_emit_instruction(emit);
3243 emit_dword(emit, opcode0.value);
3244 end_emit_instruction(emit);
3245 }
3246
3247
3248 /**
3249 * Start the hull shader control point phase
3250 */
3251 static boolean
3252 emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit)
3253 {
3254 /* If there is no control point output, skip the control point phase. */
3255 if (!needs_control_point_phase(emit)) {
3256 if (!emit->key.tcs.vertices_out) {
3257 /**
3258 * If the tcs does not explicitly generate any control point output
3259 * and the tes does not use any input control point, then
3260 * emit an empty control point phase with zero output control
3261 * point count.
3262 */
3263 emit_control_point_phase_instruction(emit);
3264
3265 /**
3266 * Since this is an empty control point phase, we will need to
3267 * add input signatures when we parse the tcs again in the
3268 * patch constant phase.
3269 */
3270 emit->tcs.fork_phase_add_signature = TRUE;
3271 }
3272 else {
3273 /**
3274 * Before skipping the control point phase, add the signature for
3275 * the passthrough control point.
3276 */
3277 emit_passthrough_control_point_signature(emit);
3278 }
3279 return FALSE;
3280 }
3281
3282 /* Start the control point phase in the hull shader */
3283 emit_control_point_phase_instruction(emit);
3284
3285 /* Declare the output control point ID */
3286 if (emit->tcs.invocation_id_sys_index == INVALID_INDEX) {
3287 /* Add invocation id declaration if it does not exist */
3288 emit->tcs.invocation_id_sys_index = emit->info.num_system_values + 1;
3289 }
3290
3291 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3292 VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID,
3293 VGPU10_OPERAND_INDEX_0D,
3294 0, 1,
3295 VGPU10_NAME_UNDEFINED,
3296 VGPU10_OPERAND_0_COMPONENT, 0,
3297 0,
3298 VGPU10_INTERPOLATION_CONSTANT, TRUE,
3299 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
3300
3301 if (emit->tcs.prim_id_index != INVALID_INDEX) {
3302 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3303 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3304 VGPU10_OPERAND_INDEX_0D,
3305 0, 1,
3306 VGPU10_NAME_UNDEFINED,
3307 VGPU10_OPERAND_0_COMPONENT,
3308 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3309 0,
3310 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
3311 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3312 }
3313
3314 return TRUE;
3315 }
3316
3317
3318 /**
3319 * Start the hull shader patch constant phase and
3320 * do the second pass of the tcs translation and emit
3321 * the relevant declarations and instructions for this phase.
3322 */
3323 static boolean
3324 emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit,
3325 struct tgsi_parse_context *parse)
3326 {
3327 unsigned inst_number = 0;
3328 boolean ret = TRUE;
3329 VGPU10OpcodeToken0 opcode0;
3330
3331 emit->skip_instruction = FALSE;
3332
3333 /* Start the patch constant phase */
3334 opcode0.value = 0;
3335 opcode0.opcodeType = VGPU10_OPCODE_HS_FORK_PHASE;
3336 begin_emit_instruction(emit);
3337 emit_dword(emit, opcode0.value);
3338 end_emit_instruction(emit);
3339
3340 /* Set the current phase to patch constant phase */
3341 emit->tcs.control_point_phase = FALSE;
3342
3343 if (emit->tcs.prim_id_index != INVALID_INDEX) {
3344 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3345 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3346 VGPU10_OPERAND_INDEX_0D,
3347 0, 1,
3348 VGPU10_NAME_UNDEFINED,
3349 VGPU10_OPERAND_0_COMPONENT,
3350 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3351 0,
3352 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
3353 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3354 }
3355
3356 /* Emit declarations for this phase */
3357 emit->index_range.required =
3358 emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE;
3359 emit_tcs_input_declarations(emit);
3360
3361 if (emit->index_range.start_index != INVALID_INDEX) {
3362 emit_index_range_declaration(emit);
3363 }
3364
3365 emit->index_range.required =
3366 emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE;
3367 emit_tcs_output_declarations(emit);
3368
3369 if (emit->index_range.start_index != INVALID_INDEX) {
3370 emit_index_range_declaration(emit);
3371 }
3372 emit->index_range.required = FALSE;
3373
3374 emit_temporaries_declaration(emit);
3375
3376 /* Reset the token position to the first instruction token
3377 * in preparation for the second pass of the shader
3378 */
3379 parse->Position = emit->tcs.instruction_token_pos;
3380
3381 while (!tgsi_parse_end_of_tokens(parse)) {
3382 tgsi_parse_token(parse);
3383
3384 assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
3385 ret = emit_vgpu10_instruction(emit, inst_number++,
3386 &parse->FullToken.FullInstruction);
3387
3388 /* Usually this applies to TCS only. If shader is reading output of
3389 * patch constant in fork phase, we should reemit all instructions
3390 * which are writting into ouput of patch constant in fork phase
3391 * to store results into temporaries.
3392 */
3393 if (emit->reemit_instruction) {
3394 assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3395 ret = emit_vgpu10_instruction(emit, inst_number,
3396 &parse->FullToken.FullInstruction);
3397 }
3398
3399 if (!ret)
3400 return FALSE;
3401 }
3402
3403 return TRUE;
3404 }
3405
3406
3407 /**
3408 * Emit index range declaration.
3409 */
3410 static boolean
3411 emit_index_range_declaration(struct svga_shader_emitter_v10 *emit)
3412 {
3413 if (emit->version < 50)
3414 return TRUE;
3415
3416 assert(emit->index_range.start_index != INVALID_INDEX);
3417 assert(emit->index_range.count != 0);
3418 assert(emit->index_range.required);
3419 assert(emit->index_range.operandType != VGPU10_NUM_OPERANDS);
3420 assert(emit->index_range.dim != 0);
3421 assert(emit->index_range.size != 0);
3422
3423 VGPU10OpcodeToken0 opcode0;
3424 VGPU10OperandToken0 operand0;
3425
3426 opcode0.value = 0;
3427 opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEX_RANGE;
3428
3429 operand0.value = 0;
3430 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
3431 operand0.indexDimension = emit->index_range.dim;
3432 operand0.operandType = emit->index_range.operandType;
3433 operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3434 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3435
3436 if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D)
3437 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3438
3439 begin_emit_instruction(emit);
3440 emit_dword(emit, opcode0.value);
3441 emit_dword(emit, operand0.value);
3442
3443 if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D) {
3444 emit_dword(emit, emit->index_range.size);
3445 emit_dword(emit, emit->index_range.start_index);
3446 emit_dword(emit, emit->index_range.count);
3447 }
3448 else {
3449 emit_dword(emit, emit->index_range.start_index);
3450 emit_dword(emit, emit->index_range.count);
3451 }
3452
3453 end_emit_instruction(emit);
3454
3455 /* Reset fields in emit->index_range struct except
3456 * emit->index_range.required which will be reset afterwards
3457 */
3458 emit->index_range.count = 0;
3459 emit->index_range.operandType = VGPU10_NUM_OPERANDS;
3460 emit->index_range.start_index = INVALID_INDEX;
3461 emit->index_range.size = 0;
3462 emit->index_range.dim = 0;
3463
3464 return TRUE;
3465 }
3466
3467
3468 /**
3469 * Emit a vgpu10 declaration "instruction".
3470 * \param index the register index
3471 * \param size array size of the operand. In most cases, it is 1,
3472 * but for inputs to geometry shader, the array size varies
3473 * depending on the primitive type.
3474 */
3475 static void
3476 emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
3477 VGPU10OpcodeToken0 opcode0,
3478 VGPU10OperandToken0 operand0,
3479 VGPU10NameToken name_token,
3480 unsigned index, unsigned size)
3481 {
3482 assert(opcode0.opcodeType);
3483 assert(operand0.mask ||
3484 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT) ||
3485 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH) ||
3486 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK) ||
3487 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) ||
3488 (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) ||
3489 (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID) ||
3490 (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK) ||
3491 (operand0.operandType == VGPU10_OPERAND_TYPE_STREAM));
3492
3493 begin_emit_instruction(emit);
3494 emit_dword(emit, opcode0.value);
3495
3496 emit_dword(emit, operand0.value);
3497
3498 if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) {
3499 /* Next token is the index of the register to declare */
3500 emit_dword(emit, index);
3501 }
3502 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) {
3503 /* Next token is the size of the register */
3504 emit_dword(emit, size);
3505
3506 /* Followed by the index of the register */
3507 emit_dword(emit, index);
3508 }
3509
3510 if (name_token.value) {
3511 emit_dword(emit, name_token.value);
3512 }
3513
3514 end_emit_instruction(emit);
3515 }
3516
3517
3518 /**
3519 * Emit the declaration for a shader input.
3520 * \param opcodeType opcode type, one of VGPU10_OPCODE_DCL_INPUTx
3521 * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x
3522 * \param dim index dimension
3523 * \param index the input register index
3524 * \param size array size of the operand. In most cases, it is 1,
3525 * but for inputs to geometry shader, the array size varies
3526 * depending on the primitive type. For tessellation control
3527 * shader, the array size is the vertex count per patch.
3528 * \param name one of VGPU10_NAME_x
3529 * \parma numComp number of components
3530 * \param selMode component selection mode
3531 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
3532 * \param interpMode interpolation mode
3533 */
3534 static void
3535 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
3536 VGPU10_OPCODE_TYPE opcodeType,
3537 VGPU10_OPERAND_TYPE operandType,
3538 VGPU10_OPERAND_INDEX_DIMENSION dim,
3539 unsigned index, unsigned size,
3540 VGPU10_SYSTEM_NAME name,
3541 VGPU10_OPERAND_NUM_COMPONENTS numComp,
3542 VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,
3543 unsigned usageMask,
3544 VGPU10_INTERPOLATION_MODE interpMode,
3545 boolean addSignature,
3546 SVGA3dDXSignatureSemanticName sgnName)
3547 {
3548 VGPU10OpcodeToken0 opcode0;
3549 VGPU10OperandToken0 operand0;
3550 VGPU10NameToken name_token;
3551
3552 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
3553 assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
3554 opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
3555 opcodeType == VGPU10_OPCODE_DCL_INPUT_SGV ||
3556 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
3557 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SIV ||
3558 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
3559 assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
3560 operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
3561 operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK ||
3562 operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
3563 operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID ||
3564 operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT ||
3565 operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT ||
3566 operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT ||
3567 operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
3568 operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID ||
3569 operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP);
3570
3571 assert(numComp <= VGPU10_OPERAND_4_COMPONENT);
3572 assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
3573 assert(dim <= VGPU10_OPERAND_INDEX_3D);
3574 assert(name == VGPU10_NAME_UNDEFINED ||
3575 name == VGPU10_NAME_POSITION ||
3576 name == VGPU10_NAME_INSTANCE_ID ||
3577 name == VGPU10_NAME_VERTEX_ID ||
3578 name == VGPU10_NAME_PRIMITIVE_ID ||
3579 name == VGPU10_NAME_IS_FRONT_FACE ||
3580 name == VGPU10_NAME_SAMPLE_INDEX ||
3581 name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
3582 name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX);
3583
3584 assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
3585 interpMode == VGPU10_INTERPOLATION_CONSTANT ||
3586 interpMode == VGPU10_INTERPOLATION_LINEAR ||
3587 interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
3588 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
3589 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID ||
3590 interpMode == VGPU10_INTERPOLATION_LINEAR_SAMPLE ||
3591 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE);
3592
3593 check_register_index(emit, opcodeType, index);
3594
3595 opcode0.value = operand0.value = name_token.value = 0;
3596
3597 opcode0.opcodeType = opcodeType;
3598 opcode0.interpolationMode = interpMode;
3599
3600 operand0.operandType = operandType;
3601 operand0.numComponents = numComp;
3602 operand0.selectionMode = selMode;
3603 operand0.mask = usageMask;
3604 operand0.indexDimension = dim;
3605 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3606 if (dim == VGPU10_OPERAND_INDEX_2D)
3607 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3608
3609 name_token.name = name;
3610
3611 emit_decl_instruction(emit, opcode0, operand0, name_token, index, size);
3612
3613 if (addSignature) {
3614 struct svga_shader_signature *sgn = &emit->signature;
3615 if (operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT) {
3616 /* Set patch constant signature */
3617 SVGA3dDXShaderSignatureEntry *sgnEntry =
3618 &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
3619 set_shader_signature_entry(sgnEntry, index,
3620 sgnName, usageMask,
3621 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3622 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3623
3624 } else if (operandType == VGPU10_OPERAND_TYPE_INPUT ||
3625 operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT) {
3626 /* Set input signature */
3627 SVGA3dDXShaderSignatureEntry *sgnEntry =
3628 &sgn->inputs[sgn->header.numInputSignatures++];
3629 set_shader_signature_entry(sgnEntry, index,
3630 sgnName, usageMask,
3631 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3632 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3633 }
3634 }
3635
3636 if (emit->index_range.required) {
3637 /* Here, index_range declaration is only applicable for opcodeType
3638 * VGPU10_OPCODE_DCL_INPUT and VGPU10_OPCODE_DCL_INPUT_PS and
3639 * for operandType VGPU10_OPERAND_TYPE_INPUT,
3640 * VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT and
3641 * VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT.
3642 */
3643 if ((opcodeType != VGPU10_OPCODE_DCL_INPUT &&
3644 opcodeType != VGPU10_OPCODE_DCL_INPUT_PS) ||
3645 (operandType != VGPU10_OPERAND_TYPE_INPUT &&
3646 operandType != VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT &&
3647 operandType != VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT)) {
3648 if (emit->index_range.start_index != INVALID_INDEX) {
3649 emit_index_range_declaration(emit);
3650 }
3651 return;
3652 }
3653
3654 if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
3655 /* Need record new index_range */
3656 emit->index_range.count = 1;
3657 emit->index_range.operandType = operandType;
3658 emit->index_range.start_index = index;
3659 emit->index_range.size = size;
3660 emit->index_range.dim = dim;
3661 }
3662 else if (index !=
3663 (emit->index_range.start_index + emit->index_range.count) ||
3664 emit->index_range.operandType != operandType) {
3665 /* Input index is not contiguous with index range or operandType is
3666 * different from index range's operandType. We need to emit current
3667 * index_range first and then start recording next index range.
3668 */
3669 emit_index_range_declaration(emit);
3670
3671 emit->index_range.count = 1;
3672 emit->index_range.operandType = operandType;
3673 emit->index_range.start_index = index;
3674 emit->index_range.size = size;
3675 emit->index_range.dim = dim;
3676 }
3677 else if (emit->index_range.operandType == operandType) {
3678 /* Since input index is contiguous with index range and operandType
3679 * is same as index range's operandType, increment index range count.
3680 */
3681 emit->index_range.count++;
3682 }
3683 }
3684 }
3685
3686
3687 /**
3688 * Emit the declaration for a shader output.
3689 * \param type one of VGPU10_OPCODE_DCL_OUTPUTx
3690 * \param index the output register index
3691 * \param name one of VGPU10_NAME_x
3692 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
3693 */
3694 static void
3695 emit_output_declaration(struct svga_shader_emitter_v10 *emit,
3696 VGPU10_OPCODE_TYPE type, unsigned index,
3697 VGPU10_SYSTEM_NAME name,
3698 unsigned writemask,
3699 boolean addSignature,
3700 SVGA3dDXSignatureSemanticName sgnName)
3701 {
3702 VGPU10OpcodeToken0 opcode0;
3703 VGPU10OperandToken0 operand0;
3704 VGPU10NameToken name_token;
3705
3706 assert(writemask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
3707 assert(type == VGPU10_OPCODE_DCL_OUTPUT ||
3708 type == VGPU10_OPCODE_DCL_OUTPUT_SGV ||
3709 type == VGPU10_OPCODE_DCL_OUTPUT_SIV);
3710 assert(name == VGPU10_NAME_UNDEFINED ||
3711 name == VGPU10_NAME_POSITION ||
3712 name == VGPU10_NAME_PRIMITIVE_ID ||
3713 name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
3714 name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX ||
3715 name == VGPU10_NAME_CLIP_DISTANCE);
3716
3717 check_register_index(emit, type, index);
3718
3719 opcode0.value = operand0.value = name_token.value = 0;
3720
3721 opcode0.opcodeType = type;
3722 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
3723 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
3724 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
3725 operand0.mask = writemask;
3726 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
3727 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3728
3729 name_token.name = name;
3730
3731 emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
3732
3733 /* Capture output signature */
3734 if (addSignature) {
3735 struct svga_shader_signature *sgn = &emit->signature;
3736 SVGA3dDXShaderSignatureEntry *sgnEntry =
3737 &sgn->outputs[sgn->header.numOutputSignatures++];
3738 set_shader_signature_entry(sgnEntry, index,
3739 sgnName, writemask,
3740 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3741 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3742 }
3743
3744 if (emit->index_range.required) {
3745 /* Here, index_range declaration is only applicable for opcodeType
3746 * VGPU10_OPCODE_DCL_OUTPUT and for operandType
3747 * VGPU10_OPERAND_TYPE_OUTPUT.
3748 */
3749 if (type != VGPU10_OPCODE_DCL_OUTPUT) {
3750 if (emit->index_range.start_index != INVALID_INDEX) {
3751 emit_index_range_declaration(emit);
3752 }
3753 return;
3754 }
3755
3756 if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
3757 /* Need record new index_range */
3758 emit->index_range.count = 1;
3759 emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
3760 emit->index_range.start_index = index;
3761 emit->index_range.size = 1;
3762 emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
3763 }
3764 else if (index !=
3765 (emit->index_range.start_index + emit->index_range.count)) {
3766 /* Output index is not contiguous with index range. We need to
3767 * emit current index_range first and then start recording next
3768 * index range.
3769 */
3770 emit_index_range_declaration(emit);
3771
3772 emit->index_range.count = 1;
3773 emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
3774 emit->index_range.start_index = index;
3775 emit->index_range.size = 1;
3776 emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
3777 }
3778 else {
3779 /* Since output index is contiguous with index range, increment
3780 * index range count.
3781 */
3782 emit->index_range.count++;
3783 }
3784 }
3785 }
3786
3787
3788 /**
3789 * Emit the declaration for the fragment depth output.
3790 */
3791 static void
3792 emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
3793 {
3794 VGPU10OpcodeToken0 opcode0;
3795 VGPU10OperandToken0 operand0;
3796 VGPU10NameToken name_token;
3797
3798 assert(emit->unit == PIPE_SHADER_FRAGMENT);
3799
3800 opcode0.value = operand0.value = name_token.value = 0;
3801
3802 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
3803 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
3804 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
3805 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
3806 operand0.mask = 0;
3807
3808 emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
3809 }
3810
3811
3812 /**
3813 * Emit the declaration for the fragment sample mask/coverage output.
3814 */
3815 static void
3816 emit_samplemask_output_declaration(struct svga_shader_emitter_v10 *emit)
3817 {
3818 VGPU10OpcodeToken0 opcode0;
3819 VGPU10OperandToken0 operand0;
3820 VGPU10NameToken name_token;
3821
3822 assert(emit->unit == PIPE_SHADER_FRAGMENT);
3823 assert(emit->version >= 41);
3824
3825 opcode0.value = operand0.value = name_token.value = 0;
3826
3827 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
3828 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
3829 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
3830 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
3831 operand0.mask = 0;
3832
3833 emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
3834 }
3835
3836
3837 /**
3838 * Emit output declarations for fragment shader.
3839 */
3840 static void
3841 emit_fs_output_declarations(struct svga_shader_emitter_v10 *emit)
3842 {
3843 unsigned int i;
3844
3845 for (i = 0; i < emit->info.num_outputs; i++) {
3846 /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
3847 const enum tgsi_semantic semantic_name =
3848 emit->info.output_semantic_name[i];
3849 const unsigned semantic_index = emit->info.output_semantic_index[i];
3850 unsigned index = i;
3851
3852 if (semantic_name == TGSI_SEMANTIC_COLOR) {
3853 assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index));
3854
3855 emit->fs.color_out_index[semantic_index] = index;
3856
3857 emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs,
3858 index + 1);
3859
3860 /* The semantic index is the shader's color output/buffer index */
3861 emit_output_declaration(emit,
3862 VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
3863 VGPU10_NAME_UNDEFINED,
3864 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3865 TRUE,
3866 map_tgsi_semantic_to_sgn_name(semantic_name));
3867
3868 if (semantic_index == 0) {
3869 if (emit->key.fs.write_color0_to_n_cbufs > 1) {
3870 /* Emit declarations for the additional color outputs
3871 * for broadcasting.
3872 */
3873 unsigned j;
3874 for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) {
3875 /* Allocate a new output index */
3876 unsigned idx = emit->info.num_outputs + j - 1;
3877 emit->fs.color_out_index[j] = idx;
3878 emit_output_declaration(emit,
3879 VGPU10_OPCODE_DCL_OUTPUT, idx,
3880 VGPU10_NAME_UNDEFINED,
3881 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3882 TRUE,
3883 map_tgsi_semantic_to_sgn_name(semantic_name));
3884 emit->info.output_semantic_index[idx] = j;
3885 }
3886
3887 emit->fs.num_color_outputs =
3888 emit->key.fs.write_color0_to_n_cbufs;
3889 }
3890 }
3891 }
3892 else if (semantic_name == TGSI_SEMANTIC_POSITION) {
3893 /* Fragment depth output */
3894 emit_fragdepth_output_declaration(emit);
3895 }
3896 else if (semantic_name == TGSI_SEMANTIC_SAMPLEMASK) {
3897 /* Sample mask output */
3898 emit_samplemask_output_declaration(emit);
3899 }
3900 else {
3901 assert(!"Bad output semantic name");
3902 }
3903 }
3904 }
3905
3906
3907 /**
3908 * Emit common output declaration for vertex processing.
3909 */
3910 static void
3911 emit_vertex_output_declaration(struct svga_shader_emitter_v10 *emit,
3912 unsigned index, unsigned writemask,
3913 boolean addSignature)
3914 {
3915 const enum tgsi_semantic semantic_name =
3916 emit->info.output_semantic_name[index];
3917 const unsigned semantic_index = emit->info.output_semantic_index[index];
3918 unsigned name, type;
3919 unsigned final_mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3920
3921 assert(emit->unit != PIPE_SHADER_FRAGMENT &&
3922 emit->unit != PIPE_SHADER_COMPUTE);
3923
3924 switch (semantic_name) {
3925 case TGSI_SEMANTIC_POSITION:
3926 if (emit->unit == PIPE_SHADER_TESS_CTRL) {
3927 /* position will be declared in control point only */
3928 assert(emit->tcs.control_point_phase);
3929 type = VGPU10_OPCODE_DCL_OUTPUT;
3930 name = VGPU10_NAME_UNDEFINED;
3931 emit_output_declaration(emit, type, index, name, final_mask, TRUE,
3932 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
3933 return;
3934 }
3935 else {
3936 type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
3937 name = VGPU10_NAME_POSITION;
3938 }
3939 /* Save the index of the vertex position output register */
3940 emit->vposition.out_index = index;
3941 break;
3942 case TGSI_SEMANTIC_CLIPDIST:
3943 type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
3944 name = VGPU10_NAME_CLIP_DISTANCE;
3945 /* save the starting index of the clip distance output register */
3946 if (semantic_index == 0)
3947 emit->clip_dist_out_index = index;
3948 final_mask = apply_clip_plane_mask(emit, writemask, semantic_index);
3949 if (final_mask == 0x0)
3950 return; /* discard this do-nothing declaration */
3951 break;
3952 case TGSI_SEMANTIC_CLIPVERTEX:
3953 type = VGPU10_OPCODE_DCL_OUTPUT;
3954 name = VGPU10_NAME_UNDEFINED;
3955 emit->clip_vertex_out_index = index;
3956 break;
3957 default:
3958 /* generic output */
3959 type = VGPU10_OPCODE_DCL_OUTPUT;
3960 name = VGPU10_NAME_UNDEFINED;
3961 }
3962
3963 emit_output_declaration(emit, type, index, name, final_mask, addSignature,
3964 map_tgsi_semantic_to_sgn_name(semantic_name));
3965 }
3966
3967
3968 /**
3969 * Emit declaration for outputs in vertex shader.
3970 */
3971 static void
3972 emit_vs_output_declarations(struct svga_shader_emitter_v10 *emit)
3973 {
3974 unsigned i;
3975 for (i = 0; i < emit->info.num_outputs; i++) {
3976 emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE);
3977 }
3978 }
3979
3980
3981 /**
3982 * A helper function to determine the writemask for an output
3983 * for the specified stream.
3984 */
3985 static unsigned
3986 output_writemask_for_stream(unsigned stream, ubyte output_streams,
3987 ubyte output_usagemask)
3988 {
3989 unsigned i;
3990 unsigned writemask = 0;
3991
3992 for (i = 0; i < 4; i++) {
3993 if ((output_streams & 0x3) == stream)
3994 writemask |= (VGPU10_OPERAND_4_COMPONENT_MASK_X << i);
3995 output_streams >>= 2;
3996 }
3997 return writemask & output_usagemask;
3998 }
3999
4000
4001 /**
4002 * Emit declaration for outputs in geometry shader.
4003 */
4004 static void
4005 emit_gs_output_declarations(struct svga_shader_emitter_v10 *emit)
4006 {
4007 unsigned i;
4008 VGPU10OpcodeToken0 opcode0;
4009 unsigned numStreamsSupported = 1;
4010 int s;
4011
4012 if (emit->version >= 50) {
4013 numStreamsSupported = ARRAY_SIZE(emit->info.num_stream_output_components);
4014 }
4015
4016 /**
4017 * Start emitting from the last stream first, so we end with
4018 * stream 0, so any of the auxiliary output declarations will
4019 * go to stream 0.
4020 */
4021 for (s = numStreamsSupported-1; s >= 0; s--) {
4022
4023 if (emit->info.num_stream_output_components[s] == 0)
4024 continue;
4025
4026 if (emit->version >= 50) {
4027 /* DCL_STREAM stream */
4028 begin_emit_instruction(emit);
4029 emit_opcode(emit, VGPU10_OPCODE_DCL_STREAM, FALSE);
4030 emit_stream_register(emit, s);
4031 end_emit_instruction(emit);
4032 }
4033
4034 /* emit output primitive topology declaration */
4035 opcode0.value = 0;
4036 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY;
4037 opcode0.primitiveTopology = emit->gs.prim_topology;
4038 emit_property_instruction(emit, opcode0, 0, 0);
4039
4040 for (i = 0; i < emit->info.num_outputs; i++) {
4041 unsigned writemask;
4042
4043 /* find out the writemask for this stream */
4044 writemask = output_writemask_for_stream(s, emit->info.output_streams[i],
4045 emit->output_usage_mask[i]);
4046
4047 if (writemask) {
4048 enum tgsi_semantic semantic_name =
4049 emit->info.output_semantic_name[i];
4050
4051 /* TODO: Still need to take care of a special case where a
4052 * single varying spans across multiple output registers.
4053 */
4054 switch(semantic_name) {
4055 case TGSI_SEMANTIC_PRIMID:
4056 emit_output_declaration(emit,
4057 VGPU10_OPCODE_DCL_OUTPUT_SGV, i,
4058 VGPU10_NAME_PRIMITIVE_ID,
4059 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4060 FALSE,
4061 map_tgsi_semantic_to_sgn_name(semantic_name));
4062 break;
4063 case TGSI_SEMANTIC_LAYER:
4064 emit_output_declaration(emit,
4065 VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4066 VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX,
4067 VGPU10_OPERAND_4_COMPONENT_MASK_X,
4068 FALSE,
4069 map_tgsi_semantic_to_sgn_name(semantic_name));
4070 break;
4071 case TGSI_SEMANTIC_VIEWPORT_INDEX:
4072 emit_output_declaration(emit,
4073 VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4074 VGPU10_NAME_VIEWPORT_ARRAY_INDEX,
4075 VGPU10_OPERAND_4_COMPONENT_MASK_X,
4076 FALSE,
4077 map_tgsi_semantic_to_sgn_name(semantic_name));
4078 emit->gs.viewport_index_out_index = i;
4079 break;
4080 default:
4081 emit_vertex_output_declaration(emit, i, writemask, FALSE);
4082 }
4083 }
4084 }
4085 }
4086
4087 /* For geometry shader outputs, it is possible the same register is
4088 * declared multiple times for different streams. So to avoid
4089 * redundant signature entries, geometry shader output signature is done
4090 * outside of the declaration.
4091 */
4092 struct svga_shader_signature *sgn = &emit->signature;
4093 SVGA3dDXShaderSignatureEntry *sgnEntry;
4094
4095 for (i = 0; i < emit->info.num_outputs; i++) {
4096 if (emit->output_usage_mask[i]) {
4097 enum tgsi_semantic sem_name = emit->info.output_semantic_name[i];
4098
4099 sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
4100 set_shader_signature_entry(sgnEntry, i,
4101 map_tgsi_semantic_to_sgn_name(sem_name),
4102 emit->output_usage_mask[i],
4103 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4104 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4105 }
4106 }
4107 }
4108
4109
4110 /**
4111 * Emit the declaration for the tess inner/outer output.
4112 * \param opcodeType either VGPU10_OPCODE_DCL_OUTPUT_SIV or _INPUT_SIV
4113 * \param operandType either VGPU10_OPERAND_TYPE_OUTPUT or _INPUT
4114 * \param name VGPU10_NAME_FINAL_*_TESSFACTOR value
4115 */
4116 static void
4117 emit_tesslevel_declaration(struct svga_shader_emitter_v10 *emit,
4118 unsigned index, unsigned opcodeType,
4119 unsigned operandType, VGPU10_SYSTEM_NAME name,
4120 SVGA3dDXSignatureSemanticName sgnName)
4121 {
4122 VGPU10OpcodeToken0 opcode0;
4123 VGPU10OperandToken0 operand0;
4124 VGPU10NameToken name_token;
4125
4126 assert(emit->version >= 50);
4127 assert(name >= VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR ||
4128 (emit->key.tcs.prim_mode == PIPE_PRIM_LINES &&
4129 name == VGPU10_NAME_UNDEFINED));
4130 assert(name <= VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
4131
4132 assert(operandType == VGPU10_OPERAND_TYPE_OUTPUT ||
4133 operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT);
4134
4135 opcode0.value = operand0.value = name_token.value = 0;
4136
4137 opcode0.opcodeType = opcodeType;
4138 operand0.operandType = operandType;
4139 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
4140 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
4141 operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4142 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
4143 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
4144
4145 name_token.name = name;
4146 emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
4147
4148 /* Capture patch constant signature */
4149 struct svga_shader_signature *sgn = &emit->signature;
4150 SVGA3dDXShaderSignatureEntry *sgnEntry =
4151 &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4152 set_shader_signature_entry(sgnEntry, index,
4153 sgnName, VGPU10_OPERAND_4_COMPONENT_MASK_X,
4154 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4155 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4156 }
4157
4158
4159 /**
4160 * Emit output declarations for tessellation control shader.
4161 */
4162 static void
4163 emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit)
4164 {
4165 unsigned int i;
4166 unsigned outputIndex = emit->num_outputs;
4167 struct svga_shader_signature *sgn = &emit->signature;
4168
4169 /**
4170 * Initialize patch_generic_out_count so it won't be counted twice
4171 * since this function is called twice, one for control point phase
4172 * and another time for patch constant phase.
4173 */
4174 emit->tcs.patch_generic_out_count = 0;
4175
4176 for (i = 0; i < emit->info.num_outputs; i++) {
4177 unsigned index = i;
4178 const enum tgsi_semantic semantic_name =
4179 emit->info.output_semantic_name[i];
4180
4181 switch (semantic_name) {
4182 case TGSI_SEMANTIC_TESSINNER:
4183 emit->tcs.inner.tgsi_index = i;
4184
4185 /* skip per-patch output declarations in control point phase */
4186 if (emit->tcs.control_point_phase)
4187 break;
4188
4189 emit->tcs.inner.out_index = outputIndex;
4190 switch (emit->key.tcs.prim_mode) {
4191 case PIPE_PRIM_QUADS:
4192 emit_tesslevel_declaration(emit, outputIndex++,
4193 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4194 VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4195 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4196
4197 emit_tesslevel_declaration(emit, outputIndex++,
4198 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4199 VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4200 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4201 break;
4202 case PIPE_PRIM_TRIANGLES:
4203 emit_tesslevel_declaration(emit, outputIndex++,
4204 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4205 VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4206 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4207 break;
4208 case PIPE_PRIM_LINES:
4209 break;
4210 default:
4211 debug_printf("Unsupported primitive type");
4212 }
4213 break;
4214
4215 case TGSI_SEMANTIC_TESSOUTER:
4216 emit->tcs.outer.tgsi_index = i;
4217
4218 /* skip per-patch output declarations in control point phase */
4219 if (emit->tcs.control_point_phase)
4220 break;
4221
4222 emit->tcs.outer.out_index = outputIndex;
4223 switch (emit->key.tcs.prim_mode) {
4224 case PIPE_PRIM_QUADS:
4225 for (int j = 0; j < 4; j++) {
4226 emit_tesslevel_declaration(emit, outputIndex++,
4227 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4228 VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j,
4229 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j);
4230 }
4231 break;
4232 case PIPE_PRIM_TRIANGLES:
4233 for (int j = 0; j < 3; j++) {
4234 emit_tesslevel_declaration(emit, outputIndex++,
4235 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4236 VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j,
4237 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j);
4238 }
4239 break;
4240 case PIPE_PRIM_LINES:
4241 for (int j = 0; j < 2; j++) {
4242 emit_tesslevel_declaration(emit, outputIndex++,
4243 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4244 VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j,
4245 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j);
4246 }
4247 break;
4248 default:
4249 debug_printf("Unsupported primitive type");
4250 }
4251 break;
4252
4253 case TGSI_SEMANTIC_PATCH:
4254 if (emit->tcs.patch_generic_out_index == INVALID_INDEX)
4255 emit->tcs.patch_generic_out_index= i;
4256 emit->tcs.patch_generic_out_count++;
4257
4258 /* skip per-patch output declarations in control point phase */
4259 if (emit->tcs.control_point_phase)
4260 break;
4261
4262 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, index,
4263 VGPU10_NAME_UNDEFINED,
4264 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4265 FALSE,
4266 map_tgsi_semantic_to_sgn_name(semantic_name));
4267
4268 SVGA3dDXShaderSignatureEntry *sgnEntry =
4269 &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4270 set_shader_signature_entry(sgnEntry, index,
4271 map_tgsi_semantic_to_sgn_name(semantic_name),
4272 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4273 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4274 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4275
4276 break;
4277
4278 default:
4279 /* save the starting index of control point outputs */
4280 if (emit->tcs.control_point_out_index == INVALID_INDEX)
4281 emit->tcs.control_point_out_index = i;
4282 emit->tcs.control_point_out_count++;
4283
4284 /* skip control point output declarations in patch constant phase */
4285 if (!emit->tcs.control_point_phase)
4286 break;
4287
4288 emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i],
4289 TRUE);
4290
4291 }
4292 }
4293
4294 if (emit->tcs.control_point_phase) {
4295 /**
4296 * Add missing control point output in control point phase.
4297 */
4298 if (emit->tcs.control_point_out_index == INVALID_INDEX) {
4299 /* use register index after tessellation factors */
4300 switch (emit->key.tcs.prim_mode) {
4301 case PIPE_PRIM_QUADS:
4302 emit->tcs.control_point_out_index = outputIndex + 6;
4303 break;
4304 case PIPE_PRIM_TRIANGLES:
4305 emit->tcs.control_point_out_index = outputIndex + 4;
4306 break;
4307 default:
4308 emit->tcs.control_point_out_index = outputIndex + 2;
4309 break;
4310 }
4311 emit->tcs.control_point_out_count++;
4312 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV,
4313 emit->tcs.control_point_out_index,
4314 VGPU10_NAME_POSITION,
4315 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4316 TRUE,
4317 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
4318
4319 /* If tcs does not output any control point output,
4320 * we can end the hull shader control point phase here
4321 * after emitting the default control point output.
4322 */
4323 emit->skip_instruction = TRUE;
4324 }
4325 }
4326 else {
4327 if (emit->tcs.outer.out_index == INVALID_INDEX) {
4328 /* since the TCS did not declare out outer tess level output register,
4329 * we declare it here for patch constant phase only.
4330 */
4331 emit->tcs.outer.out_index = outputIndex;
4332 if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
4333 for (int i = 0; i < 4; i++) {
4334 emit_tesslevel_declaration(emit, outputIndex++,
4335 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4336 VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
4337 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
4338 }
4339 }
4340 else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
4341 for (int i = 0; i < 3; i++) {
4342 emit_tesslevel_declaration(emit, outputIndex++,
4343 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4344 VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
4345 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
4346 }
4347 }
4348 }
4349
4350 if (emit->tcs.inner.out_index == INVALID_INDEX) {
4351 /* since the TCS did not declare out inner tess level output register,
4352 * we declare it here
4353 */
4354 emit->tcs.inner.out_index = outputIndex;
4355 if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
4356 emit_tesslevel_declaration(emit, outputIndex++,
4357 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4358 VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4359 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4360 emit_tesslevel_declaration(emit, outputIndex++,
4361 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4362 VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4363 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4364 }
4365 else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
4366 emit_tesslevel_declaration(emit, outputIndex++,
4367 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4368 VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4369 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4370 }
4371 }
4372 }
4373 emit->num_outputs = outputIndex;
4374 }
4375
4376
4377 /**
4378 * Emit output declarations for tessellation evaluation shader.
4379 */
4380 static void
4381 emit_tes_output_declarations(struct svga_shader_emitter_v10 *emit)
4382 {
4383 unsigned int i;
4384
4385 for (i = 0; i < emit->info.num_outputs; i++) {
4386 emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE);
4387 }
4388 }
4389
4390
4391 /**
4392 * Emit the declaration for a system value input/output.
4393 */
4394 static void
4395 emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
4396 enum tgsi_semantic semantic_name, unsigned index)
4397 {
4398 switch (semantic_name) {
4399 case TGSI_SEMANTIC_INSTANCEID:
4400 index = alloc_system_value_index(emit, index);
4401 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4402 VGPU10_OPERAND_TYPE_INPUT,
4403 VGPU10_OPERAND_INDEX_1D,
4404 index, 1,
4405 VGPU10_NAME_INSTANCE_ID,
4406 VGPU10_OPERAND_4_COMPONENT,
4407 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4408 VGPU10_OPERAND_4_COMPONENT_MASK_X,
4409 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4410 map_tgsi_semantic_to_sgn_name(semantic_name));
4411 break;
4412 case TGSI_SEMANTIC_VERTEXID:
4413 emit->vs.vertex_id_sys_index = index;
4414 index = alloc_system_value_index(emit, index);
4415 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4416 VGPU10_OPERAND_TYPE_INPUT,
4417 VGPU10_OPERAND_INDEX_1D,
4418 index, 1,
4419 VGPU10_NAME_VERTEX_ID,
4420 VGPU10_OPERAND_4_COMPONENT,
4421 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4422 VGPU10_OPERAND_4_COMPONENT_MASK_X,
4423 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4424 map_tgsi_semantic_to_sgn_name(semantic_name));
4425 break;
4426 case TGSI_SEMANTIC_SAMPLEID:
4427 assert(emit->unit == PIPE_SHADER_FRAGMENT);
4428 emit->fs.sample_id_sys_index = index;
4429 index = alloc_system_value_index(emit, index);
4430 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_PS_SIV,
4431 VGPU10_OPERAND_TYPE_INPUT,
4432 VGPU10_OPERAND_INDEX_1D,
4433 index, 1,
4434 VGPU10_NAME_SAMPLE_INDEX,
4435 VGPU10_OPERAND_4_COMPONENT,
4436 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4437 VGPU10_OPERAND_4_COMPONENT_MASK_X,
4438 VGPU10_INTERPOLATION_CONSTANT, TRUE,
4439 map_tgsi_semantic_to_sgn_name(semantic_name));
4440 break;
4441 case TGSI_SEMANTIC_SAMPLEPOS:
4442 /* This system value contains the position of the current sample
4443 * when using per-sample shading. We implement this by calling
4444 * the VGPU10_OPCODE_SAMPLE_POS instruction with the current sample
4445 * index as the argument. See emit_sample_position_instructions().
4446 */
4447 assert(emit->version >= 41);
4448 emit->fs.sample_pos_sys_index = index;
4449 index = alloc_system_value_index(emit, index);
4450 break;
4451 case TGSI_SEMANTIC_INVOCATIONID:
4452 /* Note: invocation id input is mapped to different register depending
4453 * on the shader type. In GS, it will be mapped to vGSInstanceID#.
4454 * In TCS, it will be mapped to vOutputControlPointID#.
4455 * Since in both cases, the mapped name is unique rather than
4456 * just a generic input name ("v#"), so there is no need to remap
4457 * the index value.
4458 */
4459 assert(emit->unit == PIPE_SHADER_GEOMETRY ||
4460 emit->unit == PIPE_SHADER_TESS_CTRL);
4461 assert(emit->version >= 50);
4462
4463 if (emit->unit == PIPE_SHADER_GEOMETRY) {
4464 emit->gs.invocation_id_sys_index = index;
4465 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4466 VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID,
4467 VGPU10_OPERAND_INDEX_0D,
4468 index, 1,
4469 VGPU10_NAME_UNDEFINED,
4470 VGPU10_OPERAND_0_COMPONENT,
4471 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4472 0,
4473 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4474 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4475 } else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4476 /* The emission of the control point id will be done
4477 * in the control point phase in emit_hull_shader_control_point_phase().
4478 */
4479 emit->tcs.invocation_id_sys_index = index;
4480 }
4481 break;
4482 case TGSI_SEMANTIC_SAMPLEMASK:
4483 /* Note: the PS sample mask input has a unique name ("vCoverage#")
4484 * rather than just a generic input name ("v#") so no need to remap the
4485 * index value.
4486 */
4487 assert(emit->unit == PIPE_SHADER_FRAGMENT);
4488 assert(emit->version >= 50);
4489 emit->fs.sample_mask_in_sys_index = index;
4490 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4491 VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK,
4492 VGPU10_OPERAND_INDEX_0D,
4493 index, 1,
4494 VGPU10_NAME_UNDEFINED,
4495 VGPU10_OPERAND_1_COMPONENT,
4496 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4497 0,
4498 VGPU10_INTERPOLATION_CONSTANT, TRUE,
4499 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4500 break;
4501 case TGSI_SEMANTIC_TESSCOORD:
4502 assert(emit->version >= 50);
4503
4504 unsigned usageMask = 0;
4505
4506 if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) {
4507 usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XYZ;
4508 }
4509 else if (emit->tes.prim_mode == PIPE_PRIM_LINES ||
4510 emit->tes.prim_mode == PIPE_PRIM_QUADS) {
4511 usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XY;
4512 }
4513
4514 emit->tes.tesscoord_sys_index = index;
4515 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4516 VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT,
4517 VGPU10_OPERAND_INDEX_0D,
4518 index, 1,
4519 VGPU10_NAME_UNDEFINED,
4520 VGPU10_OPERAND_4_COMPONENT,
4521 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4522 usageMask,
4523 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4524 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4525 break;
4526 case TGSI_SEMANTIC_TESSINNER:
4527 assert(emit->version >= 50);
4528 emit->tes.inner.tgsi_index = index;
4529 break;
4530 case TGSI_SEMANTIC_TESSOUTER:
4531 assert(emit->version >= 50);
4532 emit->tes.outer.tgsi_index = index;
4533 break;
4534 case TGSI_SEMANTIC_VERTICESIN:
4535 assert(emit->unit == PIPE_SHADER_TESS_CTRL);
4536 assert(emit->version >= 50);
4537
4538 /* save the system value index */
4539 emit->tcs.vertices_per_patch_index = index;
4540 break;
4541 case TGSI_SEMANTIC_PRIMID:
4542 assert(emit->version >= 50);
4543 if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4544 emit->tcs.prim_id_index = index;
4545 }
4546 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
4547 emit->tes.prim_id_index = index;
4548 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4549 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
4550 VGPU10_OPERAND_INDEX_0D,
4551 index, 1,
4552 VGPU10_NAME_UNDEFINED,
4553 VGPU10_OPERAND_0_COMPONENT,
4554 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4555 0,
4556 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4557 map_tgsi_semantic_to_sgn_name(semantic_name));
4558 }
4559 break;
4560 default:
4561 debug_printf("unexpected system value semantic index %u / %s\n",
4562 semantic_name, tgsi_semantic_names[semantic_name]);
4563 }
4564 }
4565
4566 /**
4567 * Translate a TGSI declaration to VGPU10.
4568 */
4569 static boolean
4570 emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
4571 const struct tgsi_full_declaration *decl)
4572 {
4573 switch (decl->Declaration.File) {
4574 case TGSI_FILE_INPUT:
4575 /* do nothing - see emit_input_declarations() */
4576 return TRUE;
4577
4578 case TGSI_FILE_OUTPUT:
4579 assert(decl->Range.First == decl->Range.Last);
4580 emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask;
4581 return TRUE;
4582
4583 case TGSI_FILE_TEMPORARY:
4584 /* Don't declare the temps here. Just keep track of how many
4585 * and emit the declaration later.
4586 */
4587 if (decl->Declaration.Array) {
4588 /* Indexed temporary array. Save the start index of the array
4589 * and the size of the array.
4590 */
4591 const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS);
4592 assert(arrayID < ARRAY_SIZE(emit->temp_arrays));
4593
4594 /* Save this array so we can emit the declaration for it later */
4595 create_temp_array(emit, arrayID, decl->Range.First,
4596 decl->Range.Last - decl->Range.First + 1,
4597 decl->Range.First);
4598 }
4599
4600 /* for all temps, indexed or not, keep track of highest index */
4601 emit->num_shader_temps = MAX2(emit->num_shader_temps,
4602 decl->Range.Last + 1);
4603 return TRUE;
4604
4605 case TGSI_FILE_CONSTANT:
4606 /* Don't declare constants here. Just keep track and emit later. */
4607 {
4608 unsigned constbuf = 0, num_consts;
4609 if (decl->Declaration.Dimension) {
4610 constbuf = decl->Dim.Index2D;
4611 }
4612 /* We throw an assertion here when, in fact, the shader should never
4613 * have linked due to constbuf index out of bounds, so we shouldn't
4614 * have reached here.
4615 */
4616 assert(constbuf < ARRAY_SIZE(emit->num_shader_consts));
4617
4618 num_consts = MAX2(emit->num_shader_consts[constbuf],
4619 decl->Range.Last + 1);
4620
4621 if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
4622 debug_printf("Warning: constant buffer is declared to size [%u]"
4623 " but [%u] is the limit.\n",
4624 num_consts,
4625 VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4626 }
4627 /* The linker doesn't enforce the max UBO size so we clamp here */
4628 emit->num_shader_consts[constbuf] =
4629 MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4630 }
4631 return TRUE;
4632
4633 case TGSI_FILE_IMMEDIATE:
4634 assert(!"TGSI_FILE_IMMEDIATE not handled yet!");
4635 return FALSE;
4636
4637 case TGSI_FILE_SYSTEM_VALUE:
4638 emit_system_value_declaration(emit, decl->Semantic.Name,
4639 decl->Range.First);
4640 return TRUE;
4641
4642 case TGSI_FILE_SAMPLER:
4643 /* Don't declare samplers here. Just keep track and emit later. */
4644 emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
4645 return TRUE;
4646
4647 #if 0
4648 case TGSI_FILE_RESOURCE:
4649 /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
4650 /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
4651 assert(!"TGSI_FILE_RESOURCE not handled yet");
4652 return FALSE;
4653 #endif
4654
4655 case TGSI_FILE_ADDRESS:
4656 emit->num_address_regs = MAX2(emit->num_address_regs,
4657 decl->Range.Last + 1);
4658 return TRUE;
4659
4660 case TGSI_FILE_SAMPLER_VIEW:
4661 {
4662 unsigned unit = decl->Range.First;
4663 assert(decl->Range.First == decl->Range.Last);
4664 emit->sampler_target[unit] = decl->SamplerView.Resource;
4665 /* Note: we can ignore YZW return types for now */
4666 emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX;
4667 emit->sampler_view[unit] = TRUE;
4668 }
4669 return TRUE;
4670
4671 default:
4672 assert(!"Unexpected type of declaration");
4673 return FALSE;
4674 }
4675 }
4676
4677
4678
4679 /**
4680 * Emit input declarations for fragment shader.
4681 */
4682 static void
4683 emit_fs_input_declarations(struct svga_shader_emitter_v10 *emit)
4684 {
4685 unsigned i;
4686
4687 for (i = 0; i < emit->linkage.num_inputs; i++) {
4688 enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
4689 unsigned usage_mask = emit->info.input_usage_mask[i];
4690 unsigned index = emit->linkage.input_map[i];
4691 unsigned type, interpolationMode, name;
4692 unsigned mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
4693
4694 if (usage_mask == 0)
4695 continue; /* register is not actually used */
4696
4697 if (semantic_name == TGSI_SEMANTIC_POSITION) {
4698 /* fragment position input */
4699 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4700 interpolationMode = VGPU10_INTERPOLATION_LINEAR;
4701 name = VGPU10_NAME_POSITION;
4702 if (usage_mask & TGSI_WRITEMASK_W) {
4703 /* we need to replace use of 'w' with '1/w' */
4704 emit->fs.fragcoord_input_index = i;
4705 }
4706 }
4707 else if (semantic_name == TGSI_SEMANTIC_FACE) {
4708 /* fragment front-facing input */
4709 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4710 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4711 name = VGPU10_NAME_IS_FRONT_FACE;
4712 emit->fs.face_input_index = i;
4713 }
4714 else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
4715 /* primitive ID */
4716 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4717 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4718 name = VGPU10_NAME_PRIMITIVE_ID;
4719 }
4720 else if (semantic_name == TGSI_SEMANTIC_SAMPLEID) {
4721 /* sample index / ID */
4722 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4723 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4724 name = VGPU10_NAME_SAMPLE_INDEX;
4725 }
4726 else if (semantic_name == TGSI_SEMANTIC_LAYER) {
4727 /* render target array index */
4728 if (emit->key.fs.layer_to_zero) {
4729 /**
4730 * The shader from the previous stage does not write to layer,
4731 * so reading the layer index in fragment shader should return 0.
4732 */
4733 emit->fs.layer_input_index = i;
4734 continue;
4735 } else {
4736 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4737 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4738 name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX;
4739 mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4740 }
4741 }
4742 else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) {
4743 /* viewport index */
4744 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4745 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4746 name = VGPU10_NAME_VIEWPORT_ARRAY_INDEX;
4747 mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4748 }
4749 else {
4750 /* general fragment input */
4751 type = VGPU10_OPCODE_DCL_INPUT_PS;
4752 interpolationMode =
4753 translate_interpolation(emit,
4754 emit->info.input_interpolate[i],
4755 emit->info.input_interpolate_loc[i]);
4756
4757 /* keeps track if flat interpolation mode is being used */
4758 emit->uses_flat_interp = emit->uses_flat_interp ||
4759 (interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
4760
4761 name = VGPU10_NAME_UNDEFINED;
4762 }
4763
4764 emit_input_declaration(emit, type,
4765 VGPU10_OPERAND_TYPE_INPUT,
4766 VGPU10_OPERAND_INDEX_1D, index, 1,
4767 name,
4768 VGPU10_OPERAND_4_COMPONENT,
4769 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4770 mask,
4771 interpolationMode, TRUE,
4772 map_tgsi_semantic_to_sgn_name(semantic_name));
4773 }
4774 }
4775
4776
4777 /**
4778 * Emit input declarations for vertex shader.
4779 */
4780 static void
4781 emit_vs_input_declarations(struct svga_shader_emitter_v10 *emit)
4782 {
4783 unsigned i;
4784
4785 for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
4786 unsigned usage_mask = emit->info.input_usage_mask[i];
4787 unsigned index = i;
4788
4789 if (usage_mask == 0)
4790 continue; /* register is not actually used */
4791
4792 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4793 VGPU10_OPERAND_TYPE_INPUT,
4794 VGPU10_OPERAND_INDEX_1D, index, 1,
4795 VGPU10_NAME_UNDEFINED,
4796 VGPU10_OPERAND_4_COMPONENT,
4797 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4798 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4799 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4800 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4801 }
4802 }
4803
4804
4805 /**
4806 * Emit input declarations for geometry shader.
4807 */
4808 static void
4809 emit_gs_input_declarations(struct svga_shader_emitter_v10 *emit)
4810 {
4811 unsigned i;
4812
4813 for (i = 0; i < emit->info.num_inputs; i++) {
4814 enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
4815 unsigned usage_mask = emit->info.input_usage_mask[i];
4816 unsigned index = emit->linkage.input_map[i];
4817 unsigned opcodeType, operandType;
4818 unsigned numComp, selMode;
4819 unsigned name;
4820 unsigned dim;
4821
4822 if (usage_mask == 0)
4823 continue; /* register is not actually used */
4824
4825 opcodeType = VGPU10_OPCODE_DCL_INPUT;
4826 operandType = VGPU10_OPERAND_TYPE_INPUT;
4827 numComp = VGPU10_OPERAND_4_COMPONENT;
4828 selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
4829 name = VGPU10_NAME_UNDEFINED;
4830
4831 /* all geometry shader inputs are two dimensional except
4832 * gl_PrimitiveID
4833 */
4834 dim = VGPU10_OPERAND_INDEX_2D;
4835
4836 if (semantic_name == TGSI_SEMANTIC_PRIMID) {
4837 /* Primitive ID */
4838 operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
4839 dim = VGPU10_OPERAND_INDEX_0D;
4840 numComp = VGPU10_OPERAND_0_COMPONENT;
4841 selMode = 0;
4842
4843 /* also save the register index so we can check for
4844 * primitive id when emit src register. We need to modify the
4845 * operand type, index dimension when emit primitive id src reg.
4846 */
4847 emit->gs.prim_id_index = i;
4848 }
4849 else if (semantic_name == TGSI_SEMANTIC_POSITION) {
4850 /* vertex position input */
4851 opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV;
4852 name = VGPU10_NAME_POSITION;
4853 }
4854
4855 emit_input_declaration(emit, opcodeType, operandType,
4856 dim, index,
4857 emit->gs.input_size,
4858 name,
4859 numComp, selMode,
4860 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4861 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4862 map_tgsi_semantic_to_sgn_name(semantic_name));
4863 }
4864 }
4865
4866
4867 /**
4868 * Emit input declarations for tessellation control shader.
4869 */
4870 static void
4871 emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit)
4872 {
4873 unsigned i;
4874 unsigned size = emit->key.tcs.vertices_per_patch;
4875 unsigned indicesMask = 0;
4876 boolean addSignature = TRUE;
4877
4878 if (!emit->tcs.control_point_phase)
4879 addSignature = emit->tcs.fork_phase_add_signature;
4880
4881 for (i = 0; i < emit->info.num_inputs; i++) {
4882 unsigned usage_mask = emit->info.input_usage_mask[i];
4883 unsigned index = emit->linkage.input_map[i];
4884 enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
4885 VGPU10_SYSTEM_NAME name = VGPU10_NAME_UNDEFINED;
4886 VGPU10_OPERAND_TYPE operandType = VGPU10_OPERAND_TYPE_INPUT;
4887 SVGA3dDXSignatureSemanticName sgn_name =
4888 map_tgsi_semantic_to_sgn_name(semantic_name);
4889
4890 /* indices that are declared */
4891 indicesMask |= 1 << index;
4892
4893 if (semantic_name == TGSI_SEMANTIC_POSITION ||
4894 index == emit->linkage.position_index) {
4895 /* save the input control point index for later use */
4896 emit->tcs.control_point_input_index = i;
4897 }
4898 else if (usage_mask == 0) {
4899 continue; /* register is not actually used */
4900 }
4901 else if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
4902 /* The shadow copy is being used here. So set the signature name
4903 * to UNDEFINED.
4904 */
4905 sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
4906 }
4907
4908 /* input control points in the patch constant phase are emitted in the
4909 * vicp register rather than the v register.
4910 */
4911 if (!emit->tcs.control_point_phase) {
4912 operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
4913 }
4914
4915 /* Tessellation control shader inputs are two dimensional.
4916 * The array size is determined by the patch vertex count.
4917 */
4918 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4919 operandType,
4920 VGPU10_OPERAND_INDEX_2D,
4921 index, size, name,
4922 VGPU10_OPERAND_4_COMPONENT,
4923 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4924 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4925 VGPU10_INTERPOLATION_UNDEFINED,
4926 addSignature, sgn_name);
4927 }
4928
4929 if (emit->tcs.control_point_phase) {
4930 if (emit->tcs.control_point_input_index == INVALID_INDEX) {
4931
4932 /* Add input control point declaration if it does not exist */
4933 if ((indicesMask & (1 << emit->linkage.position_index)) == 0) {
4934 emit->linkage.input_map[emit->linkage.num_inputs] =
4935 emit->linkage.position_index;
4936 emit->tcs.control_point_input_index = emit->linkage.num_inputs++;
4937
4938 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4939 VGPU10_OPERAND_TYPE_INPUT,
4940 VGPU10_OPERAND_INDEX_2D,
4941 emit->linkage.position_index,
4942 emit->key.tcs.vertices_per_patch,
4943 VGPU10_NAME_UNDEFINED,
4944 VGPU10_OPERAND_4_COMPONENT,
4945 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4946 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4947 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4948 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
4949 }
4950 }
4951
4952 /* Also add an address register for the indirection to the
4953 * input control points
4954 */
4955 emit->tcs.control_point_addr_index = emit->num_address_regs++;
4956 }
4957 }
4958
4959
4960 static void
4961 emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 *emit)
4962 {
4963
4964 /* In tcs, tess factors are emitted as extra outputs.
4965 * The starting register index for the tess factors is captured
4966 * in the compile key.
4967 */
4968 unsigned inputIndex = emit->key.tes.tessfactor_index;
4969
4970 if (emit->tes.prim_mode == PIPE_PRIM_QUADS) {
4971 if (emit->key.tes.need_tessouter) {
4972 emit->tes.outer.in_index = inputIndex;
4973 for (int i = 0; i < 4; i++) {
4974 emit_tesslevel_declaration(emit, inputIndex++,
4975 VGPU10_OPCODE_DCL_INPUT_SIV,
4976 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
4977 VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
4978 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
4979 }
4980 }
4981
4982 if (emit->key.tes.need_tessinner) {
4983 emit->tes.inner.in_index = inputIndex;
4984 emit_tesslevel_declaration(emit, inputIndex++,
4985 VGPU10_OPCODE_DCL_INPUT_SIV,
4986 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
4987 VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4988 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4989
4990 emit_tesslevel_declaration(emit, inputIndex++,
4991 VGPU10_OPCODE_DCL_INPUT_SIV,
4992 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
4993 VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4994 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4995 }
4996 }
4997 else if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) {
4998 if (emit->key.tes.need_tessouter) {
4999 emit->tes.outer.in_index = inputIndex;
5000 for (int i = 0; i < 3; i++) {
5001 emit_tesslevel_declaration(emit, inputIndex++,
5002 VGPU10_OPCODE_DCL_INPUT_SIV,
5003 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5004 VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
5005 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
5006 }
5007 }
5008
5009 if (emit->key.tes.need_tessinner) {
5010 emit->tes.inner.in_index = inputIndex;
5011 emit_tesslevel_declaration(emit, inputIndex++,
5012 VGPU10_OPCODE_DCL_INPUT_SIV,
5013 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5014 VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
5015 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
5016 }
5017 }
5018 else if (emit->tes.prim_mode == PIPE_PRIM_LINES) {
5019 if (emit->key.tes.need_tessouter) {
5020 emit->tes.outer.in_index = inputIndex;
5021 emit_tesslevel_declaration(emit, inputIndex++,
5022 VGPU10_OPCODE_DCL_INPUT_SIV,
5023 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5024 VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR,
5025 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR);
5026
5027 emit_tesslevel_declaration(emit, inputIndex++,
5028 VGPU10_OPCODE_DCL_INPUT_SIV,
5029 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5030 VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR,
5031 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
5032 }
5033 }
5034 }
5035
5036
5037 /**
5038 * Emit input declarations for tessellation evaluation shader.
5039 */
5040 static void
5041 emit_tes_input_declarations(struct svga_shader_emitter_v10 *emit)
5042 {
5043 unsigned i;
5044
5045 for (i = 0; i < emit->info.num_inputs; i++) {
5046 unsigned usage_mask = emit->info.input_usage_mask[i];
5047 unsigned index = emit->linkage.input_map[i];
5048 unsigned size;
5049 const enum tgsi_semantic semantic_name =
5050 emit->info.input_semantic_name[i];
5051 SVGA3dDXSignatureSemanticName sgn_name;
5052 VGPU10_OPERAND_TYPE operandType;
5053 VGPU10_OPERAND_INDEX_DIMENSION dim;
5054
5055 if (usage_mask == 0)
5056 usage_mask = 1; /* at least set usage mask to one */
5057
5058 if (semantic_name == TGSI_SEMANTIC_PATCH) {
5059 operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
5060 dim = VGPU10_OPERAND_INDEX_1D;
5061 size = 1;
5062 sgn_name = map_tgsi_semantic_to_sgn_name(semantic_name);
5063 }
5064 else {
5065 operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
5066 dim = VGPU10_OPERAND_INDEX_2D;
5067 size = emit->key.tes.vertices_per_patch;
5068 sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
5069 }
5070
5071 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, operandType,
5072 dim, index, size, VGPU10_NAME_UNDEFINED,
5073 VGPU10_OPERAND_4_COMPONENT,
5074 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5075 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5076 VGPU10_INTERPOLATION_UNDEFINED,
5077 TRUE, sgn_name);
5078 }
5079
5080 emit_tessfactor_input_declarations(emit);
5081
5082 /* DX spec requires DS input controlpoint/patch-constant signatures to match
5083 * the HS output controlpoint/patch-constant signatures exactly.
5084 * Add missing input declarations even if they are not used in the shader.
5085 */
5086 if (emit->linkage.num_inputs < emit->linkage.prevShader.num_outputs) {
5087 struct tgsi_shader_info *prevInfo = emit->prevShaderInfo;
5088 for (i = 0; i < emit->linkage.prevShader.num_outputs; i++) {
5089
5090 /* If a tcs output does not have a corresponding input register in
5091 * tes, add one.
5092 */
5093 if (emit->linkage.prevShader.output_map[i] >
5094 emit->linkage.input_map_max) {
5095 const enum tgsi_semantic sem_name = prevInfo->output_semantic_name[i];
5096
5097 if (sem_name == TGSI_SEMANTIC_PATCH) {
5098 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5099 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5100 VGPU10_OPERAND_INDEX_1D,
5101 i, 1, VGPU10_NAME_UNDEFINED,
5102 VGPU10_OPERAND_4_COMPONENT,
5103 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5104 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5105 VGPU10_INTERPOLATION_UNDEFINED,
5106 TRUE,
5107 map_tgsi_semantic_to_sgn_name(sem_name));
5108
5109 } else if (sem_name != TGSI_SEMANTIC_TESSINNER &&
5110 sem_name != TGSI_SEMANTIC_TESSOUTER) {
5111 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5112 VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT,
5113 VGPU10_OPERAND_INDEX_2D,
5114 i, emit->key.tes.vertices_per_patch,
5115 VGPU10_NAME_UNDEFINED,
5116 VGPU10_OPERAND_4_COMPONENT,
5117 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5118 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5119 VGPU10_INTERPOLATION_UNDEFINED,
5120 TRUE,
5121 map_tgsi_semantic_to_sgn_name(sem_name));
5122 }
5123 /* tessellation factors are taken care of in
5124 * emit_tessfactor_input_declarations().
5125 */
5126 }
5127 }
5128 }
5129 }
5130
5131
5132 /**
5133 * Emit all input declarations.
5134 */
5135 static boolean
5136 emit_input_declarations(struct svga_shader_emitter_v10 *emit)
5137 {
5138 emit->index_range.required =
5139 emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE;
5140
5141 switch (emit->unit) {
5142 case PIPE_SHADER_FRAGMENT:
5143 emit_fs_input_declarations(emit);
5144 break;
5145 case PIPE_SHADER_GEOMETRY:
5146 emit_gs_input_declarations(emit);
5147 break;
5148 case PIPE_SHADER_VERTEX:
5149 emit_vs_input_declarations(emit);
5150 break;
5151 case PIPE_SHADER_TESS_CTRL:
5152 emit_tcs_input_declarations(emit);
5153 break;
5154 case PIPE_SHADER_TESS_EVAL:
5155 emit_tes_input_declarations(emit);
5156 break;
5157 case PIPE_SHADER_COMPUTE:
5158 //XXX emit_cs_input_declarations(emit);
5159 break;
5160 default:
5161 assert(0);
5162 }
5163
5164 if (emit->index_range.start_index != INVALID_INDEX) {
5165 emit_index_range_declaration(emit);
5166 }
5167 emit->index_range.required = FALSE;
5168 return TRUE;
5169 }
5170
5171
5172 /**
5173 * Emit all output declarations.
5174 */
5175 static boolean
5176 emit_output_declarations(struct svga_shader_emitter_v10 *emit)
5177 {
5178 emit->index_range.required =
5179 emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE;
5180
5181 switch (emit->unit) {
5182 case PIPE_SHADER_FRAGMENT:
5183 emit_fs_output_declarations(emit);
5184 break;
5185 case PIPE_SHADER_GEOMETRY:
5186 emit_gs_output_declarations(emit);
5187 break;
5188 case PIPE_SHADER_VERTEX:
5189 emit_vs_output_declarations(emit);
5190 break;
5191 case PIPE_SHADER_TESS_CTRL:
5192 emit_tcs_output_declarations(emit);
5193 break;
5194 case PIPE_SHADER_TESS_EVAL:
5195 emit_tes_output_declarations(emit);
5196 break;
5197 case PIPE_SHADER_COMPUTE:
5198 //XXX emit_cs_output_declarations(emit);
5199 break;
5200 default:
5201 assert(0);
5202 }
5203
5204 if (emit->vposition.so_index != INVALID_INDEX &&
5205 emit->vposition.out_index != INVALID_INDEX) {
5206
5207 assert(emit->unit != PIPE_SHADER_FRAGMENT);
5208
5209 /* Emit the declaration for the non-adjusted vertex position
5210 * for stream output purpose
5211 */
5212 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5213 emit->vposition.so_index,
5214 VGPU10_NAME_UNDEFINED,
5215 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5216 TRUE,
5217 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
5218 }
5219
5220 if (emit->clip_dist_so_index != INVALID_INDEX &&
5221 emit->clip_dist_out_index != INVALID_INDEX) {
5222
5223 assert(emit->unit != PIPE_SHADER_FRAGMENT);
5224
5225 /* Emit the declaration for the clip distance shadow copy which
5226 * will be used for stream output purpose and for clip distance
5227 * varying variable
5228 */
5229 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5230 emit->clip_dist_so_index,
5231 VGPU10_NAME_UNDEFINED,
5232 emit->output_usage_mask[emit->clip_dist_out_index],
5233 TRUE,
5234 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5235
5236 if (emit->info.num_written_clipdistance > 4) {
5237 /* for the second clip distance register, each handles 4 planes */
5238 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5239 emit->clip_dist_so_index + 1,
5240 VGPU10_NAME_UNDEFINED,
5241 emit->output_usage_mask[emit->clip_dist_out_index+1],
5242 TRUE,
5243 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5244 }
5245 }
5246
5247 if (emit->index_range.start_index != INVALID_INDEX) {
5248 emit_index_range_declaration(emit);
5249 }
5250 emit->index_range.required = FALSE;
5251 return TRUE;
5252 }
5253
5254
5255 /**
5256 * A helper function to create a temporary indexable array
5257 * and initialize the corresponding entries in the temp_map array.
5258 */
5259 static void
5260 create_temp_array(struct svga_shader_emitter_v10 *emit,
5261 unsigned arrayID, unsigned first, unsigned count,
5262 unsigned startIndex)
5263 {
5264 unsigned i, tempIndex = startIndex;
5265
5266 emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1);
5267 assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS);
5268 emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS);
5269
5270 emit->temp_arrays[arrayID].start = first;
5271 emit->temp_arrays[arrayID].size = count;
5272
5273 /* Fill in the temp_map entries for this temp array */
5274 for (i = 0; i < count; i++, tempIndex++) {
5275 emit->temp_map[tempIndex].arrayId = arrayID;
5276 emit->temp_map[tempIndex].index = i;
5277 }
5278 }
5279
5280
5281 /**
5282 * Emit the declaration for the temporary registers.
5283 */
5284 static boolean
5285 emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
5286 {
5287 unsigned total_temps, reg, i;
5288
5289 total_temps = emit->num_shader_temps;
5290
5291 /* If there is indirect access to non-indexable temps in the shader,
5292 * convert those temps to indexable temps. This works around a bug
5293 * in the GLSL->TGSI translator exposed in piglit test
5294 * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test.
5295 * Internal temps added by the driver remain as non-indexable temps.
5296 */
5297 if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) &&
5298 emit->num_temp_arrays == 0) {
5299 create_temp_array(emit, 1, 0, total_temps, 0);
5300 }
5301
5302 /* Allocate extra temps for specially-implemented instructions,
5303 * such as LIT.
5304 */
5305 total_temps += MAX_INTERNAL_TEMPS;
5306
5307 /* Allocate extra temps for clip distance or clip vertex.
5308 */
5309 if (emit->clip_mode == CLIP_DISTANCE) {
5310 /* We need to write the clip distance to a temporary register
5311 * first. Then it will be copied to the shadow copy for
5312 * the clip distance varying variable and stream output purpose.
5313 * It will also be copied to the actual CLIPDIST register
5314 * according to the enabled clip planes
5315 */
5316 emit->clip_dist_tmp_index = total_temps++;
5317 if (emit->info.num_written_clipdistance > 4)
5318 total_temps++; /* second clip register */
5319 }
5320 else if (emit->clip_mode == CLIP_VERTEX && emit->key.last_vertex_stage) {
5321 /* If the current shader is in the last vertex processing stage,
5322 * We need to convert the TGSI CLIPVERTEX output to one or more
5323 * clip distances. Allocate a temp reg for the clipvertex here.
5324 */
5325 assert(emit->info.writes_clipvertex > 0);
5326 emit->clip_vertex_tmp_index = total_temps;
5327 total_temps++;
5328 }
5329
5330 if (emit->info.uses_vertexid) {
5331 assert(emit->unit == PIPE_SHADER_VERTEX);
5332 emit->vs.vertex_id_tmp_index = total_temps++;
5333 }
5334
5335 if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) {
5336 if (emit->vposition.need_prescale || emit->key.vs.undo_viewport ||
5337 emit->key.clip_plane_enable ||
5338 emit->vposition.so_index != INVALID_INDEX) {
5339 emit->vposition.tmp_index = total_temps;
5340 total_temps += 1;
5341 }
5342
5343 if (emit->vposition.need_prescale) {
5344 emit->vposition.prescale_scale_index = total_temps++;
5345 emit->vposition.prescale_trans_index = total_temps++;
5346 }
5347
5348 if (emit->unit == PIPE_SHADER_VERTEX) {
5349 unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 |
5350 emit->key.vs.adjust_attrib_itof |
5351 emit->key.vs.adjust_attrib_utof |
5352 emit->key.vs.attrib_is_bgra |
5353 emit->key.vs.attrib_puint_to_snorm |
5354 emit->key.vs.attrib_puint_to_uscaled |
5355 emit->key.vs.attrib_puint_to_sscaled);
5356 while (attrib_mask) {
5357 unsigned index = u_bit_scan(&attrib_mask);
5358 emit->vs.adjusted_input[index] = total_temps++;
5359 }
5360 }
5361 else if (emit->unit == PIPE_SHADER_GEOMETRY) {
5362 if (emit->key.gs.writes_viewport_index)
5363 emit->gs.viewport_index_tmp_index = total_temps++;
5364 }
5365 }
5366 else if (emit->unit == PIPE_SHADER_FRAGMENT) {
5367 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
5368 emit->key.fs.write_color0_to_n_cbufs > 1) {
5369 /* Allocate a temp to hold the output color */
5370 emit->fs.color_tmp_index = total_temps;
5371 total_temps += 1;
5372 }
5373
5374 if (emit->fs.face_input_index != INVALID_INDEX) {
5375 /* Allocate a temp for the +/-1 face register */
5376 emit->fs.face_tmp_index = total_temps;
5377 total_temps += 1;
5378 }
5379
5380 if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
5381 /* Allocate a temp for modified fragment position register */
5382 emit->fs.fragcoord_tmp_index = total_temps;
5383 total_temps += 1;
5384 }
5385
5386 if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
5387 /* Allocate a temp for the sample position */
5388 emit->fs.sample_pos_tmp_index = total_temps++;
5389 }
5390 }
5391 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
5392 if (emit->vposition.need_prescale) {
5393 emit->vposition.tmp_index = total_temps++;
5394 emit->vposition.prescale_scale_index = total_temps++;
5395 emit->vposition.prescale_trans_index = total_temps++;
5396 }
5397
5398 if (emit->tes.inner.tgsi_index) {
5399 emit->tes.inner.temp_index = total_temps;
5400 total_temps += 1;
5401 }
5402
5403 if (emit->tes.outer.tgsi_index) {
5404 emit->tes.outer.temp_index = total_temps;
5405 total_temps += 1;
5406 }
5407 }
5408 else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
5409 if (emit->tcs.inner.tgsi_index != INVALID_INDEX) {
5410 if (!emit->tcs.control_point_phase) {
5411 emit->tcs.inner.temp_index = total_temps;
5412 total_temps += 1;
5413 }
5414 }
5415 if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
5416 if (!emit->tcs.control_point_phase) {
5417 emit->tcs.outer.temp_index = total_temps;
5418 total_temps += 1;
5419 }
5420 }
5421
5422 if (emit->tcs.control_point_phase &&
5423 emit->info.reads_pervertex_outputs) {
5424 emit->tcs.control_point_tmp_index = total_temps;
5425 total_temps += emit->tcs.control_point_out_count;
5426 }
5427 else if (!emit->tcs.control_point_phase &&
5428 emit->info.reads_perpatch_outputs) {
5429
5430 /* If there is indirect access to the patch constant outputs
5431 * in the control point phase, then an indexable temporary array
5432 * will be created for these patch constant outputs.
5433 * Note, indirect access can only be applicable to
5434 * patch constant outputs in the control point phase.
5435 */
5436 if (emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
5437 unsigned arrayID =
5438 emit->num_temp_arrays ? emit->num_temp_arrays : 1;
5439 create_temp_array(emit, arrayID, 0,
5440 emit->tcs.patch_generic_out_count, total_temps);
5441 }
5442 emit->tcs.patch_generic_tmp_index = total_temps;
5443 total_temps += emit->tcs.patch_generic_out_count;
5444 }
5445
5446 emit->tcs.invocation_id_tmp_index = total_temps++;
5447 }
5448
5449 for (i = 0; i < emit->num_address_regs; i++) {
5450 emit->address_reg_index[i] = total_temps++;
5451 }
5452
5453 /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10
5454 * temp indexes. Basically, we compact all the non-array temp register
5455 * indexes into a consecutive series.
5456 *
5457 * Before, we may have some TGSI declarations like:
5458 * DCL TEMP[0..1], LOCAL
5459 * DCL TEMP[2..4], ARRAY(1), LOCAL
5460 * DCL TEMP[5..7], ARRAY(2), LOCAL
5461 * plus, some extra temps, like TEMP[8], TEMP[9] for misc things
5462 *
5463 * After, we'll have a map like this:
5464 * temp_map[0] = { array 0, index 0 }
5465 * temp_map[1] = { array 0, index 1 }
5466 * temp_map[2] = { array 1, index 0 }
5467 * temp_map[3] = { array 1, index 1 }
5468 * temp_map[4] = { array 1, index 2 }
5469 * temp_map[5] = { array 2, index 0 }
5470 * temp_map[6] = { array 2, index 1 }
5471 * temp_map[7] = { array 2, index 2 }
5472 * temp_map[8] = { array 0, index 2 }
5473 * temp_map[9] = { array 0, index 3 }
5474 *
5475 * We'll declare two arrays of 3 elements, plus a set of four non-indexed
5476 * temps numbered 0..3
5477 *
5478 * Any time we emit a temporary register index, we'll have to use the
5479 * temp_map[] table to convert the TGSI index to the VGPU10 index.
5480 *
5481 * Finally, we recompute the total_temps value here.
5482 */
5483 reg = 0;
5484 for (i = 0; i < total_temps; i++) {
5485 if (emit->temp_map[i].arrayId == 0) {
5486 emit->temp_map[i].index = reg++;
5487 }
5488 }
5489
5490 if (0) {
5491 debug_printf("total_temps %u\n", total_temps);
5492 for (i = 0; i < total_temps; i++) {
5493 debug_printf("temp %u -> array %u index %u\n",
5494 i, emit->temp_map[i].arrayId, emit->temp_map[i].index);
5495 }
5496 }
5497
5498 total_temps = reg;
5499
5500 /* Emit declaration of ordinary temp registers */
5501 if (total_temps > 0) {
5502 VGPU10OpcodeToken0 opcode0;
5503
5504 opcode0.value = 0;
5505 opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS;
5506
5507 begin_emit_instruction(emit);
5508 emit_dword(emit, opcode0.value);
5509 emit_dword(emit, total_temps);
5510 end_emit_instruction(emit);
5511 }
5512
5513 /* Emit declarations for indexable temp arrays. Skip 0th entry since
5514 * it's unused.
5515 */
5516 for (i = 1; i < emit->num_temp_arrays; i++) {
5517 unsigned num_temps = emit->temp_arrays[i].size;
5518
5519 if (num_temps > 0) {
5520 VGPU10OpcodeToken0 opcode0;
5521
5522 opcode0.value = 0;
5523 opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP;
5524
5525 begin_emit_instruction(emit);
5526 emit_dword(emit, opcode0.value);
5527 emit_dword(emit, i); /* which array */
5528 emit_dword(emit, num_temps);
5529 emit_dword(emit, 4); /* num components */
5530 end_emit_instruction(emit);
5531
5532 total_temps += num_temps;
5533 }
5534 }
5535
5536 /* Check that the grand total of all regular and indexed temps is
5537 * under the limit.
5538 */
5539 check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1);
5540
5541 return TRUE;
5542 }
5543
5544
5545 static boolean
5546 emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
5547 {
5548 VGPU10OpcodeToken0 opcode0;
5549 VGPU10OperandToken0 operand0;
5550 unsigned total_consts, i;
5551
5552 opcode0.value = 0;
5553 opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER;
5554 opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED;
5555 /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */
5556
5557 operand0.value = 0;
5558 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
5559 operand0.indexDimension = VGPU10_OPERAND_INDEX_2D;
5560 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5561 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5562 operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
5563 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
5564 operand0.swizzleX = 0;
5565 operand0.swizzleY = 1;
5566 operand0.swizzleZ = 2;
5567 operand0.swizzleW = 3;
5568
5569 /**
5570 * Emit declaration for constant buffer [0]. We also allocate
5571 * room for the extra constants here.
5572 */
5573 total_consts = emit->num_shader_consts[0];
5574
5575 /* Now, allocate constant slots for the "extra" constants.
5576 * Note: it's critical that these extra constant locations
5577 * exactly match what's emitted by the "extra" constants code
5578 * in svga_state_constants.c
5579 */
5580
5581 /* Vertex position scale/translation */
5582 if (emit->vposition.need_prescale) {
5583 emit->vposition.prescale_cbuf_index = total_consts;
5584 total_consts += (2 * emit->vposition.num_prescale);
5585 }
5586
5587 if (emit->unit == PIPE_SHADER_VERTEX) {
5588 if (emit->key.vs.undo_viewport) {
5589 emit->vs.viewport_index = total_consts++;
5590 }
5591 if (emit->key.vs.need_vertex_id_bias) {
5592 emit->vs.vertex_id_bias_index = total_consts++;
5593 }
5594 }
5595
5596 /* user-defined clip planes */
5597 if (emit->key.clip_plane_enable) {
5598 unsigned n = util_bitcount(emit->key.clip_plane_enable);
5599 assert(emit->unit != PIPE_SHADER_FRAGMENT &&
5600 emit->unit != PIPE_SHADER_COMPUTE);
5601 for (i = 0; i < n; i++) {
5602 emit->clip_plane_const[i] = total_consts++;
5603 }
5604 }
5605
5606 for (i = 0; i < emit->num_samplers; i++) {
5607
5608 if (emit->sampler_view[i]) {
5609
5610 /* Texcoord scale factors for RECT textures */
5611 if (emit->key.tex[i].unnormalized) {
5612 emit->texcoord_scale_index[i] = total_consts++;
5613 }
5614
5615 /* Texture buffer sizes */
5616 if (emit->sampler_target[i] == TGSI_TEXTURE_BUFFER) {
5617 emit->texture_buffer_size_index[i] = total_consts++;
5618 }
5619 }
5620 }
5621
5622 if (total_consts > 0) {
5623 begin_emit_instruction(emit);
5624 emit_dword(emit, opcode0.value);
5625 emit_dword(emit, operand0.value);
5626 emit_dword(emit, 0); /* which const buffer slot */
5627 emit_dword(emit, total_consts);
5628 end_emit_instruction(emit);
5629 }
5630
5631 /* Declare remaining constant buffers (UBOs) */
5632 for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) {
5633 if (emit->num_shader_consts[i] > 0) {
5634 begin_emit_instruction(emit);
5635 emit_dword(emit, opcode0.value);
5636 emit_dword(emit, operand0.value);
5637 emit_dword(emit, i); /* which const buffer slot */
5638 emit_dword(emit, emit->num_shader_consts[i]);
5639 end_emit_instruction(emit);
5640 }
5641 }
5642
5643 return TRUE;
5644 }
5645
5646
5647 /**
5648 * Emit declarations for samplers.
5649 */
5650 static boolean
5651 emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
5652 {
5653 unsigned i;
5654
5655 for (i = 0; i < emit->num_samplers; i++) {
5656 VGPU10OpcodeToken0 opcode0;
5657 VGPU10OperandToken0 operand0;
5658
5659 opcode0.value = 0;
5660 opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER;
5661 opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT;
5662
5663 operand0.value = 0;
5664 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
5665 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
5666 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
5667 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5668
5669 begin_emit_instruction(emit);
5670 emit_dword(emit, opcode0.value);
5671 emit_dword(emit, operand0.value);
5672 emit_dword(emit, i);
5673 end_emit_instruction(emit);
5674 }
5675
5676 return TRUE;
5677 }
5678
5679
5680 /**
5681 * Translate TGSI_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
5682 */
5683 static unsigned
5684 tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,
5685 unsigned num_samples,
5686 boolean is_array)
5687 {
5688 if (target == TGSI_TEXTURE_2D_MSAA && num_samples < 2) {
5689 target = TGSI_TEXTURE_2D;
5690 }
5691 else if (target == TGSI_TEXTURE_2D_ARRAY_MSAA && num_samples < 2) {
5692 target = TGSI_TEXTURE_2D_ARRAY;
5693 }
5694
5695 switch (target) {
5696 case TGSI_TEXTURE_BUFFER:
5697 return VGPU10_RESOURCE_DIMENSION_BUFFER;
5698 case TGSI_TEXTURE_1D:
5699 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5700 case TGSI_TEXTURE_2D:
5701 case TGSI_TEXTURE_RECT:
5702 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5703 case TGSI_TEXTURE_3D:
5704 return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
5705 case TGSI_TEXTURE_CUBE:
5706 case TGSI_TEXTURE_SHADOWCUBE:
5707 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
5708 case TGSI_TEXTURE_SHADOW1D:
5709 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5710 case TGSI_TEXTURE_SHADOW2D:
5711 case TGSI_TEXTURE_SHADOWRECT:
5712 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5713 case TGSI_TEXTURE_1D_ARRAY:
5714 case TGSI_TEXTURE_SHADOW1D_ARRAY:
5715 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
5716 : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5717 case TGSI_TEXTURE_2D_ARRAY:
5718 case TGSI_TEXTURE_SHADOW2D_ARRAY:
5719 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY
5720 : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5721 case TGSI_TEXTURE_2D_MSAA:
5722 return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
5723 case TGSI_TEXTURE_2D_ARRAY_MSAA:
5724 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
5725 : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
5726 case TGSI_TEXTURE_CUBE_ARRAY:
5727 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
5728 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY
5729 : VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
5730 default:
5731 assert(!"Unexpected resource type");
5732 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5733 }
5734 }
5735
5736
5737 /**
5738 * Given a tgsi_return_type, return true iff it is an integer type.
5739 */
5740 static boolean
5741 is_integer_type(enum tgsi_return_type type)
5742 {
5743 switch (type) {
5744 case TGSI_RETURN_TYPE_SINT:
5745 case TGSI_RETURN_TYPE_UINT:
5746 return TRUE;
5747 case TGSI_RETURN_TYPE_FLOAT:
5748 case TGSI_RETURN_TYPE_UNORM:
5749 case TGSI_RETURN_TYPE_SNORM:
5750 return FALSE;
5751 case TGSI_RETURN_TYPE_COUNT:
5752 default:
5753 assert(!"is_integer_type: Unknown tgsi_return_type");
5754 return FALSE;
5755 }
5756 }
5757
5758
5759 /**
5760 * Emit declarations for resources.
5761 * XXX When we're sure that all TGSI shaders will be generated with
5762 * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may
5763 * rework this code.
5764 */
5765 static boolean
5766 emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
5767 {
5768 unsigned i;
5769
5770 /* Emit resource decl for each sampler */
5771 for (i = 0; i < emit->num_samplers; i++) {
5772 VGPU10OpcodeToken0 opcode0;
5773 VGPU10OperandToken0 operand0;
5774 VGPU10ResourceReturnTypeToken return_type;
5775 VGPU10_RESOURCE_RETURN_TYPE rt;
5776
5777 opcode0.value = 0;
5778 opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
5779 opcode0.resourceDimension =
5780 tgsi_texture_to_resource_dimension(emit->sampler_target[i],
5781 emit->key.tex[i].num_samples,
5782 emit->key.tex[i].is_array);
5783 opcode0.sampleCount = emit->key.tex[i].num_samples;
5784 operand0.value = 0;
5785 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
5786 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
5787 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
5788 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5789
5790 #if 1
5791 /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */
5792 STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1);
5793 STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1);
5794 STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
5795 STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
5796 STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
5797 assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT);
5798 rt = emit->sampler_return_type[i] + 1;
5799 #else
5800 switch (emit->sampler_return_type[i]) {
5801 case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
5802 case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
5803 case TGSI_RETURN_TYPE_SINT: rt = VGPU10_RETURN_TYPE_SINT; break;
5804 case TGSI_RETURN_TYPE_UINT: rt = VGPU10_RETURN_TYPE_UINT; break;
5805 case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break;
5806 case TGSI_RETURN_TYPE_COUNT:
5807 default:
5808 rt = VGPU10_RETURN_TYPE_FLOAT;
5809 assert(!"emit_resource_declarations: Unknown tgsi_return_type");
5810 }
5811 #endif
5812
5813 return_type.value = 0;
5814 return_type.component0 = rt;
5815 return_type.component1 = rt;
5816 return_type.component2 = rt;
5817 return_type.component3 = rt;
5818
5819 begin_emit_instruction(emit);
5820 emit_dword(emit, opcode0.value);
5821 emit_dword(emit, operand0.value);
5822 emit_dword(emit, i);
5823 emit_dword(emit, return_type.value);
5824 end_emit_instruction(emit);
5825 }
5826
5827 return TRUE;
5828 }
5829
5830 /**
5831 * Emit instruction with n=1, 2 or 3 source registers.
5832 */
5833 static void
5834 emit_instruction_opn(struct svga_shader_emitter_v10 *emit,
5835 unsigned opcode,
5836 const struct tgsi_full_dst_register *dst,
5837 const struct tgsi_full_src_register *src1,
5838 const struct tgsi_full_src_register *src2,
5839 const struct tgsi_full_src_register *src3,
5840 boolean saturate, bool precise)
5841 {
5842 begin_emit_instruction(emit);
5843 emit_opcode_precise(emit, opcode, saturate, precise);
5844 emit_dst_register(emit, dst);
5845 emit_src_register(emit, src1);
5846 if (src2) {
5847 emit_src_register(emit, src2);
5848 }
5849 if (src3) {
5850 emit_src_register(emit, src3);
5851 }
5852 end_emit_instruction(emit);
5853 }
5854
5855 static void
5856 emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
5857 unsigned opcode,
5858 const struct tgsi_full_dst_register *dst,
5859 const struct tgsi_full_src_register *src)
5860 {
5861 emit_instruction_opn(emit, opcode, dst, src, NULL, NULL, FALSE, FALSE);
5862 }
5863
5864 static void
5865 emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
5866 VGPU10_OPCODE_TYPE opcode,
5867 const struct tgsi_full_dst_register *dst,
5868 const struct tgsi_full_src_register *src1,
5869 const struct tgsi_full_src_register *src2)
5870 {
5871 emit_instruction_opn(emit, opcode, dst, src1, src2, NULL, FALSE, FALSE);
5872 }
5873
5874 static void
5875 emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
5876 VGPU10_OPCODE_TYPE opcode,
5877 const struct tgsi_full_dst_register *dst,
5878 const struct tgsi_full_src_register *src1,
5879 const struct tgsi_full_src_register *src2,
5880 const struct tgsi_full_src_register *src3)
5881 {
5882 emit_instruction_opn(emit, opcode, dst, src1, src2, src3, FALSE, FALSE);
5883 }
5884
5885 static void
5886 emit_instruction_op0(struct svga_shader_emitter_v10 *emit,
5887 VGPU10_OPCODE_TYPE opcode)
5888 {
5889 begin_emit_instruction(emit);
5890 emit_opcode(emit, opcode, FALSE);
5891 end_emit_instruction(emit);
5892 }
5893
5894 /**
5895 * Tessellation inner/outer levels needs to be store into its
5896 * appropriate registers depending on prim_mode.
5897 */
5898 static void
5899 store_tesslevels(struct svga_shader_emitter_v10 *emit)
5900 {
5901 int i;
5902
5903 /* tessellation levels are required input/out in hull shader.
5904 * emitting the inner/outer tessellation levels, either from
5905 * values provided in tcs or fallback default values which is 1.0
5906 */
5907 if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
5908 struct tgsi_full_src_register temp_src;
5909
5910 if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
5911 temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
5912 else
5913 temp_src = make_immediate_reg_float(emit, 1.0f);
5914
5915 for (i = 0; i < 2; i++) {
5916 struct tgsi_full_src_register src =
5917 scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
5918 struct tgsi_full_dst_register dst =
5919 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index + i);
5920 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
5921 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
5922 }
5923
5924 if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
5925 temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
5926 else
5927 temp_src = make_immediate_reg_float(emit, 1.0f);
5928
5929 for (i = 0; i < 4; i++) {
5930 struct tgsi_full_src_register src =
5931 scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
5932 struct tgsi_full_dst_register dst =
5933 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
5934 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
5935 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
5936 }
5937 }
5938 else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
5939 struct tgsi_full_src_register temp_src;
5940
5941 if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
5942 temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
5943 else
5944 temp_src = make_immediate_reg_float(emit, 1.0f);
5945
5946 struct tgsi_full_src_register src =
5947 scalar_src(&temp_src, TGSI_SWIZZLE_X);
5948 struct tgsi_full_dst_register dst =
5949 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index);
5950 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
5951 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
5952
5953 if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
5954 temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
5955 else
5956 temp_src = make_immediate_reg_float(emit, 1.0f);
5957
5958 for (i = 0; i < 3; i++) {
5959 struct tgsi_full_src_register src =
5960 scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
5961 struct tgsi_full_dst_register dst =
5962 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
5963 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
5964 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
5965 }
5966 }
5967 else if (emit->key.tcs.prim_mode == PIPE_PRIM_LINES) {
5968 if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
5969 struct tgsi_full_src_register temp_src =
5970 make_src_temp_reg(emit->tcs.outer.temp_index);
5971 for (i = 0; i < 2; i++) {
5972 struct tgsi_full_src_register src =
5973 scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
5974 struct tgsi_full_dst_register dst =
5975 make_dst_reg(TGSI_FILE_OUTPUT,
5976 emit->tcs.outer.out_index + i);
5977 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
5978 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
5979 }
5980 }
5981 }
5982 else {
5983 debug_printf("Unsupported primitive type");
5984 }
5985 }
5986
5987
5988 /**
5989 * Emit the actual clip distance instructions to be used for clipping
5990 * by copying the clip distance from the temporary registers to the
5991 * CLIPDIST registers written with the enabled planes mask.
5992 * Also copy the clip distance from the temporary to the clip distance
5993 * shadow copy register which will be referenced by the input shader
5994 */
5995 static void
5996 emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
5997 {
5998 struct tgsi_full_src_register tmp_clip_dist_src;
5999 struct tgsi_full_dst_register clip_dist_dst;
6000
6001 unsigned i;
6002 unsigned clip_plane_enable = emit->key.clip_plane_enable;
6003 unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
6004 int num_written_clipdist = emit->info.num_written_clipdistance;
6005
6006 assert(emit->clip_dist_out_index != INVALID_INDEX);
6007 assert(emit->clip_dist_tmp_index != INVALID_INDEX);
6008
6009 /**
6010 * Temporary reset the temporary clip dist register index so
6011 * that the copy to the real clip dist register will not
6012 * attempt to copy to the temporary register again
6013 */
6014 emit->clip_dist_tmp_index = INVALID_INDEX;
6015
6016 for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) {
6017
6018 tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
6019
6020 /**
6021 * copy to the shadow copy for use by varying variable and
6022 * stream output. All clip distances
6023 * will be written regardless of the enabled clipping planes.
6024 */
6025 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6026 emit->clip_dist_so_index + i);
6027
6028 /* MOV clip_dist_so, tmp_clip_dist */
6029 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6030 &tmp_clip_dist_src);
6031
6032 /**
6033 * copy those clip distances to enabled clipping planes
6034 * to CLIPDIST registers for clipping
6035 */
6036 if (clip_plane_enable & 0xf) {
6037 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6038 emit->clip_dist_out_index + i);
6039 clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf);
6040
6041 /* MOV CLIPDIST, tmp_clip_dist */
6042 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6043 &tmp_clip_dist_src);
6044 }
6045 /* four clip planes per clip register */
6046 clip_plane_enable >>= 4;
6047 }
6048 /**
6049 * set the temporary clip dist register index back to the
6050 * temporary index for the next vertex
6051 */
6052 emit->clip_dist_tmp_index = clip_dist_tmp_index;
6053 }
6054
6055 /* Declare clip distance output registers for user-defined clip planes
6056 * or the TGSI_CLIPVERTEX output.
6057 */
6058 static void
6059 emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
6060 {
6061 unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6062 unsigned index = emit->num_outputs;
6063 unsigned plane_mask;
6064
6065 assert(emit->unit != PIPE_SHADER_FRAGMENT);
6066 assert(num_clip_planes <= 8);
6067
6068 if (emit->clip_mode != CLIP_LEGACY &&
6069 emit->clip_mode != CLIP_VERTEX) {
6070 return;
6071 }
6072
6073 if (num_clip_planes == 0)
6074 return;
6075
6076 /* Convert clip vertex to clip distances only in the last vertex stage */
6077 if (!emit->key.last_vertex_stage)
6078 return;
6079
6080 /* Declare one or two clip output registers. The number of components
6081 * in the mask reflects the number of clip planes. For example, if 5
6082 * clip planes are needed, we'll declare outputs similar to:
6083 * dcl_output_siv o2.xyzw, clip_distance
6084 * dcl_output_siv o3.x, clip_distance
6085 */
6086 emit->clip_dist_out_index = index; /* save the starting clip dist reg index */
6087
6088 plane_mask = (1 << num_clip_planes) - 1;
6089 if (plane_mask & 0xf) {
6090 unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6091 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index,
6092 VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE,
6093 SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6094 emit->num_outputs++;
6095 }
6096 if (plane_mask & 0xf0) {
6097 unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6098 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1,
6099 VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE,
6100 SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6101 emit->num_outputs++;
6102 }
6103 }
6104
6105
6106 /**
6107 * Emit the instructions for writing to the clip distance registers
6108 * to handle legacy/automatic clip planes.
6109 * For each clip plane, the distance is the dot product of the vertex
6110 * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients.
6111 * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE
6112 * output registers already declared.
6113 */
6114 static void
6115 emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit,
6116 unsigned vpos_tmp_index)
6117 {
6118 unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6119
6120 assert(emit->clip_mode == CLIP_LEGACY);
6121 assert(num_clip_planes <= 8);
6122
6123 assert(emit->unit == PIPE_SHADER_VERTEX ||
6124 emit->unit == PIPE_SHADER_GEOMETRY ||
6125 emit->unit == PIPE_SHADER_TESS_EVAL);
6126
6127 for (i = 0; i < num_clip_planes; i++) {
6128 struct tgsi_full_dst_register dst;
6129 struct tgsi_full_src_register plane_src, vpos_src;
6130 unsigned reg_index = emit->clip_dist_out_index + i / 4;
6131 unsigned comp = i % 4;
6132 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6133
6134 /* create dst, src regs */
6135 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6136 dst = writemask_dst(&dst, writemask);
6137
6138 plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6139 vpos_src = make_src_temp_reg(vpos_tmp_index);
6140
6141 /* DP4 clip_dist, plane, vpos */
6142 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6143 &plane_src, &vpos_src);
6144 }
6145 }
6146
6147
6148 /**
6149 * Emit the instructions for computing the clip distance results from
6150 * the clip vertex temporary.
6151 * For each clip plane, the distance is the dot product of the clip vertex
6152 * position (found in a temp reg) and the clip plane coefficients.
6153 */
6154 static void
6155 emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
6156 {
6157 const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable);
6158 unsigned i;
6159 struct tgsi_full_dst_register dst;
6160 struct tgsi_full_src_register clipvert_src;
6161 const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index;
6162
6163 assert(emit->unit == PIPE_SHADER_VERTEX ||
6164 emit->unit == PIPE_SHADER_GEOMETRY ||
6165 emit->unit == PIPE_SHADER_TESS_EVAL);
6166
6167 assert(emit->clip_mode == CLIP_VERTEX);
6168
6169 clipvert_src = make_src_temp_reg(clip_vertex_tmp);
6170
6171 for (i = 0; i < num_clip; i++) {
6172 struct tgsi_full_src_register plane_src;
6173 unsigned reg_index = emit->clip_dist_out_index + i / 4;
6174 unsigned comp = i % 4;
6175 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6176
6177 /* create dst, src regs */
6178 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6179 dst = writemask_dst(&dst, writemask);
6180
6181 plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6182
6183 /* DP4 clip_dist, plane, vpos */
6184 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6185 &plane_src, &clipvert_src);
6186 }
6187
6188 /* copy temporary clip vertex register to the clip vertex register */
6189
6190 assert(emit->clip_vertex_out_index != INVALID_INDEX);
6191
6192 /**
6193 * temporary reset the temporary clip vertex register index so
6194 * that copy to the clip vertex register will not attempt
6195 * to copy to the temporary register again
6196 */
6197 emit->clip_vertex_tmp_index = INVALID_INDEX;
6198
6199 /* MOV clip_vertex, clip_vertex_tmp */
6200 dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index);
6201 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
6202 &dst, &clipvert_src);
6203
6204 /**
6205 * set the temporary clip vertex register index back to the
6206 * temporary index for the next vertex
6207 */
6208 emit->clip_vertex_tmp_index = clip_vertex_tmp;
6209 }
6210
6211 /**
6212 * Emit code to convert RGBA to BGRA
6213 */
6214 static void
6215 emit_swap_r_b(struct svga_shader_emitter_v10 *emit,
6216 const struct tgsi_full_dst_register *dst,
6217 const struct tgsi_full_src_register *src)
6218 {
6219 struct tgsi_full_src_register bgra_src =
6220 swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W);
6221
6222 begin_emit_instruction(emit);
6223 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
6224 emit_dst_register(emit, dst);
6225 emit_src_register(emit, &bgra_src);
6226 end_emit_instruction(emit);
6227 }
6228
6229
6230 /** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */
6231 static void
6232 emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit,
6233 const struct tgsi_full_dst_register *dst,
6234 const struct tgsi_full_src_register *src)
6235 {
6236 struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f);
6237 struct tgsi_full_src_register two =
6238 make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f);
6239 struct tgsi_full_src_register neg_two =
6240 make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
6241
6242 unsigned val_tmp = get_temp_index(emit);
6243 struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp);
6244 struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp);
6245
6246 unsigned bias_tmp = get_temp_index(emit);
6247 struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp);
6248 struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp);
6249
6250 /* val = src * 2.0 */
6251 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, src, &two);
6252
6253 /* bias = src > 0.5 */
6254 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, src, &half);
6255
6256 /* bias = bias & -2.0 */
6257 emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst,
6258 &bias_src, &neg_two);
6259
6260 /* dst = val + bias */
6261 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst,
6262 &val_src, &bias_src);
6263
6264 free_temp_indexes(emit);
6265 }
6266
6267
6268 /** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */
6269 static void
6270 emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit,
6271 const struct tgsi_full_dst_register *dst,
6272 const struct tgsi_full_src_register *src)
6273 {
6274 struct tgsi_full_src_register scale =
6275 make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f);
6276
6277 /* dst = src * scale */
6278 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale);
6279 }
6280
6281
6282 /** Convert from R32_UINT to 10_10_10_2_sscaled */
6283 static void
6284 emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit,
6285 const struct tgsi_full_dst_register *dst,
6286 const struct tgsi_full_src_register *src)
6287 {
6288 struct tgsi_full_src_register lshift =
6289 make_immediate_reg_int4(emit, 22, 12, 2, 0);
6290 struct tgsi_full_src_register rshift =
6291 make_immediate_reg_int4(emit, 22, 22, 22, 30);
6292
6293 struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X);
6294
6295 unsigned tmp = get_temp_index(emit);
6296 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6297 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6298
6299 /*
6300 * r = (pixel << 22) >> 22; # signed int in [511, -512]
6301 * g = (pixel << 12) >> 22; # signed int in [511, -512]
6302 * b = (pixel << 2) >> 22; # signed int in [511, -512]
6303 * a = (pixel << 0) >> 30; # signed int in [1, -2]
6304 * dst = i_to_f(r,g,b,a); # convert to float
6305 */
6306 emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst,
6307 &src_xxxx, &lshift);
6308 emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst,
6309 &tmp_src, &rshift);
6310 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src);
6311
6312 free_temp_indexes(emit);
6313 }
6314
6315
6316 /**
6317 * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
6318 */
6319 static boolean
6320 emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
6321 const struct tgsi_full_instruction *inst)
6322 {
6323 unsigned index = inst->Dst[0].Register.Index;
6324 struct tgsi_full_dst_register dst;
6325 VGPU10_OPCODE_TYPE opcode;
6326
6327 assert(index < MAX_VGPU10_ADDR_REGS);
6328 dst = make_dst_temp_reg(emit->address_reg_index[index]);
6329 dst = writemask_dst(&dst, inst->Dst[0].Register.WriteMask);
6330
6331 /* ARL dst, s0
6332 * Translates into:
6333 * FTOI address_tmp, s0
6334 *
6335 * UARL dst, s0
6336 * Translates into:
6337 * MOV address_tmp, s0
6338 */
6339 if (inst->Instruction.Opcode == TGSI_OPCODE_ARL)
6340 opcode = VGPU10_OPCODE_FTOI;
6341 else
6342 opcode = VGPU10_OPCODE_MOV;
6343
6344 emit_instruction_op1(emit, opcode, &dst, &inst->Src[0]);
6345
6346 return TRUE;
6347 }
6348
6349
6350 /**
6351 * Emit code for TGSI_OPCODE_CAL instruction.
6352 */
6353 static boolean
6354 emit_cal(struct svga_shader_emitter_v10 *emit,
6355 const struct tgsi_full_instruction *inst)
6356 {
6357 unsigned label = inst->Label.Label;
6358 VGPU10OperandToken0 operand;
6359 operand.value = 0;
6360 operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
6361
6362 begin_emit_instruction(emit);
6363 emit_dword(emit, operand.value);
6364 emit_dword(emit, label);
6365 end_emit_instruction(emit);
6366
6367 return TRUE;
6368 }
6369
6370
6371 /**
6372 * Emit code for TGSI_OPCODE_IABS instruction.
6373 */
6374 static boolean
6375 emit_iabs(struct svga_shader_emitter_v10 *emit,
6376 const struct tgsi_full_instruction *inst)
6377 {
6378 /* dst.x = (src0.x < 0) ? -src0.x : src0.x
6379 * dst.y = (src0.y < 0) ? -src0.y : src0.y
6380 * dst.z = (src0.z < 0) ? -src0.z : src0.z
6381 * dst.w = (src0.w < 0) ? -src0.w : src0.w
6382 *
6383 * Translates into
6384 * IMAX dst, src, neg(src)
6385 */
6386 struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
6387 emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0],
6388 &inst->Src[0], &neg_src);
6389
6390 return TRUE;
6391 }
6392
6393
6394 /**
6395 * Emit code for TGSI_OPCODE_CMP instruction.
6396 */
6397 static boolean
6398 emit_cmp(struct svga_shader_emitter_v10 *emit,
6399 const struct tgsi_full_instruction *inst)
6400 {
6401 /* dst.x = (src0.x < 0) ? src1.x : src2.x
6402 * dst.y = (src0.y < 0) ? src1.y : src2.y
6403 * dst.z = (src0.z < 0) ? src1.z : src2.z
6404 * dst.w = (src0.w < 0) ? src1.w : src2.w
6405 *
6406 * Translates into
6407 * LT tmp, src0, 0.0
6408 * MOVC dst, tmp, src1, src2
6409 */
6410 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
6411 unsigned tmp = get_temp_index(emit);
6412 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6413 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6414
6415 emit_instruction_opn(emit, VGPU10_OPCODE_LT, &tmp_dst,
6416 &inst->Src[0], &zero, NULL, FALSE,
6417 inst->Instruction.Precise);
6418 emit_instruction_opn(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0],
6419 &tmp_src, &inst->Src[1], &inst->Src[2],
6420 inst->Instruction.Saturate, FALSE);
6421
6422 free_temp_indexes(emit);
6423
6424 return TRUE;
6425 }
6426
6427
6428 /**
6429 * Emit code for TGSI_OPCODE_DST instruction.
6430 */
6431 static boolean
6432 emit_dst(struct svga_shader_emitter_v10 *emit,
6433 const struct tgsi_full_instruction *inst)
6434 {
6435 /*
6436 * dst.x = 1
6437 * dst.y = src0.y * src1.y
6438 * dst.z = src0.z
6439 * dst.w = src1.w
6440 */
6441
6442 struct tgsi_full_src_register s0_yyyy =
6443 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
6444 struct tgsi_full_src_register s0_zzzz =
6445 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
6446 struct tgsi_full_src_register s1_yyyy =
6447 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
6448 struct tgsi_full_src_register s1_wwww =
6449 scalar_src(&inst->Src[1], TGSI_SWIZZLE_W);
6450
6451 /*
6452 * If dst and either src0 and src1 are the same we need
6453 * to create a temporary for it and insert a extra move.
6454 */
6455 unsigned tmp_move = get_temp_index(emit);
6456 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
6457 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
6458
6459 /* MOV dst.x, 1.0 */
6460 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
6461 struct tgsi_full_dst_register dst_x =
6462 writemask_dst(&move_dst, TGSI_WRITEMASK_X);
6463 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
6464
6465 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
6466 }
6467
6468 /* MUL dst.y, s0.y, s1.y */
6469 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
6470 struct tgsi_full_dst_register dst_y =
6471 writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
6472
6473 emit_instruction_opn(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy,
6474 &s1_yyyy, NULL, inst->Instruction.Saturate,
6475 inst->Instruction.Precise);
6476 }
6477
6478 /* MOV dst.z, s0.z */
6479 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
6480 struct tgsi_full_dst_register dst_z =
6481 writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
6482
6483 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
6484 &dst_z, &s0_zzzz, NULL, NULL,
6485 inst->Instruction.Saturate,
6486 inst->Instruction.Precise);
6487 }
6488
6489 /* MOV dst.w, s1.w */
6490 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
6491 struct tgsi_full_dst_register dst_w =
6492 writemask_dst(&move_dst, TGSI_WRITEMASK_W);
6493
6494 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
6495 &dst_w, &s1_wwww, NULL, NULL,
6496 inst->Instruction.Saturate,
6497 inst->Instruction.Precise);
6498 }
6499
6500 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
6501 free_temp_indexes(emit);
6502
6503 return TRUE;
6504 }
6505
6506
6507 /**
6508 * A helper function to return the stream index as specified in
6509 * the immediate register
6510 */
6511 static inline unsigned
6512 find_stream_index(struct svga_shader_emitter_v10 *emit,
6513 const struct tgsi_full_src_register *src)
6514 {
6515 return emit->immediates[src->Register.Index][src->Register.SwizzleX].Int;
6516 }
6517
6518
6519 /**
6520 * Emit code for TGSI_OPCODE_ENDPRIM (GS only)
6521 */
6522 static boolean
6523 emit_endprim(struct svga_shader_emitter_v10 *emit,
6524 const struct tgsi_full_instruction *inst)
6525 {
6526 assert(emit->unit == PIPE_SHADER_GEOMETRY);
6527
6528 begin_emit_instruction(emit);
6529 if (emit->version >= 50) {
6530 unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
6531
6532 if (emit->info.num_stream_output_components[streamIndex] == 0) {
6533 /**
6534 * If there is no output for this stream, discard this instruction.
6535 */
6536 emit->discard_instruction = TRUE;
6537 }
6538 else {
6539 emit_opcode(emit, VGPU10_OPCODE_CUT_STREAM, FALSE);
6540 assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
6541 emit_stream_register(emit, streamIndex);
6542 }
6543 }
6544 else {
6545 emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE);
6546 }
6547 end_emit_instruction(emit);
6548 return TRUE;
6549 }
6550
6551
6552 /**
6553 * Emit code for TGSI_OPCODE_EX2 (2^x) instruction.
6554 */
6555 static boolean
6556 emit_ex2(struct svga_shader_emitter_v10 *emit,
6557 const struct tgsi_full_instruction *inst)
6558 {
6559 /* Note that TGSI_OPCODE_EX2 computes only one value from src.x
6560 * while VGPU10 computes four values.
6561 *
6562 * dst = EX2(src):
6563 * dst.xyzw = 2.0 ^ src.x
6564 */
6565
6566 struct tgsi_full_src_register src_xxxx =
6567 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
6568 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
6569
6570 /* EXP tmp, s0.xxxx */
6571 emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx,
6572 NULL, NULL,
6573 inst->Instruction.Saturate,
6574 inst->Instruction.Precise);
6575
6576 return TRUE;
6577 }
6578
6579
6580 /**
6581 * Emit code for TGSI_OPCODE_EXP instruction.
6582 */
6583 static boolean
6584 emit_exp(struct svga_shader_emitter_v10 *emit,
6585 const struct tgsi_full_instruction *inst)
6586 {
6587 /*
6588 * dst.x = 2 ^ floor(s0.x)
6589 * dst.y = s0.x - floor(s0.x)
6590 * dst.z = 2 ^ s0.x
6591 * dst.w = 1.0
6592 */
6593
6594 struct tgsi_full_src_register src_xxxx =
6595 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
6596 unsigned tmp = get_temp_index(emit);
6597 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6598 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6599
6600 /*
6601 * If dst and src are the same we need to create
6602 * a temporary for it and insert a extra move.
6603 */
6604 unsigned tmp_move = get_temp_index(emit);
6605 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
6606 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
6607
6608 /* only use X component of temp reg */
6609 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
6610 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
6611
6612 /* ROUND_NI tmp.x, s0.x */
6613 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
6614 &src_xxxx); /* round to -infinity */
6615
6616 /* EXP dst.x, tmp.x */
6617 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
6618 struct tgsi_full_dst_register dst_x =
6619 writemask_dst(&move_dst, TGSI_WRITEMASK_X);
6620
6621 emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src,
6622 NULL, NULL,
6623 inst->Instruction.Saturate,
6624 inst->Instruction.Precise);
6625 }
6626
6627 /* ADD dst.y, s0.x, -tmp */
6628 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
6629 struct tgsi_full_dst_register dst_y =
6630 writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
6631 struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src);
6632
6633 emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx,
6634 &neg_tmp_src, NULL,
6635 inst->Instruction.Saturate,
6636 inst->Instruction.Precise);
6637 }
6638
6639 /* EXP dst.z, s0.x */
6640 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
6641 struct tgsi_full_dst_register dst_z =
6642 writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
6643
6644 emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx,
6645 NULL, NULL,
6646 inst->Instruction.Saturate,
6647 inst->Instruction.Precise);
6648 }
6649
6650 /* MOV dst.w, 1.0 */
6651 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
6652 struct tgsi_full_dst_register dst_w =
6653 writemask_dst(&move_dst, TGSI_WRITEMASK_W);
6654 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
6655
6656 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
6657 }
6658
6659 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
6660
6661 free_temp_indexes(emit);
6662
6663 return TRUE;
6664 }
6665
6666
6667 /**
6668 * Emit code for TGSI_OPCODE_IF instruction.
6669 */
6670 static boolean
6671 emit_if(struct svga_shader_emitter_v10 *emit,
6672 const struct tgsi_full_src_register *src)
6673 {
6674 VGPU10OpcodeToken0 opcode0;
6675
6676 /* The src register should be a scalar */
6677 assert(src->Register.SwizzleX == src->Register.SwizzleY &&
6678 src->Register.SwizzleX == src->Register.SwizzleZ &&
6679 src->Register.SwizzleX == src->Register.SwizzleW);
6680
6681 /* The only special thing here is that we need to set the
6682 * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if
6683 * src.x is non-zero.
6684 */
6685 opcode0.value = 0;
6686 opcode0.opcodeType = VGPU10_OPCODE_IF;
6687 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
6688
6689 begin_emit_instruction(emit);
6690 emit_dword(emit, opcode0.value);
6691 emit_src_register(emit, src);
6692 end_emit_instruction(emit);
6693
6694 return TRUE;
6695 }
6696
6697
6698 /**
6699 * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of
6700 * the register components are negative).
6701 */
6702 static boolean
6703 emit_kill_if(struct svga_shader_emitter_v10 *emit,
6704 const struct tgsi_full_instruction *inst)
6705 {
6706 unsigned tmp = get_temp_index(emit);
6707 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6708 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6709
6710 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
6711
6712 struct tgsi_full_dst_register tmp_dst_x =
6713 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
6714 struct tgsi_full_src_register tmp_src_xxxx =
6715 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
6716
6717 /* tmp = src[0] < 0.0 */
6718 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], &zero);
6719
6720 if (!same_swizzle_terms(&inst->Src[0])) {
6721 /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
6722 * logically OR the swizzle terms. Most uses of KILL_IF only
6723 * test one channel so it's good to avoid these extra steps.
6724 */
6725 struct tgsi_full_src_register tmp_src_yyyy =
6726 scalar_src(&tmp_src, TGSI_SWIZZLE_Y);
6727 struct tgsi_full_src_register tmp_src_zzzz =
6728 scalar_src(&tmp_src, TGSI_SWIZZLE_Z);
6729 struct tgsi_full_src_register tmp_src_wwww =
6730 scalar_src(&tmp_src, TGSI_SWIZZLE_W);
6731
6732 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
6733 &tmp_src_yyyy);
6734 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
6735 &tmp_src_zzzz);
6736 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
6737 &tmp_src_wwww);
6738 }
6739
6740 begin_emit_instruction(emit);
6741 emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */
6742 emit_src_register(emit, &tmp_src_xxxx);
6743 end_emit_instruction(emit);
6744
6745 free_temp_indexes(emit);
6746
6747 return TRUE;
6748 }
6749
6750
6751 /**
6752 * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard).
6753 */
6754 static boolean
6755 emit_kill(struct svga_shader_emitter_v10 *emit,
6756 const struct tgsi_full_instruction *inst)
6757 {
6758 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
6759
6760 /* DISCARD if 0.0 is zero */
6761 begin_emit_instruction(emit);
6762 emit_discard_opcode(emit, FALSE);
6763 emit_src_register(emit, &zero);
6764 end_emit_instruction(emit);
6765
6766 return TRUE;
6767 }
6768
6769
6770 /**
6771 * Emit code for TGSI_OPCODE_LG2 instruction.
6772 */
6773 static boolean
6774 emit_lg2(struct svga_shader_emitter_v10 *emit,
6775 const struct tgsi_full_instruction *inst)
6776 {
6777 /* Note that TGSI_OPCODE_LG2 computes only one value from src.x
6778 * while VGPU10 computes four values.
6779 *
6780 * dst = LG2(src):
6781 * dst.xyzw = log2(src.x)
6782 */
6783
6784 struct tgsi_full_src_register src_xxxx =
6785 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
6786 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
6787
6788 /* LOG tmp, s0.xxxx */
6789 emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
6790 &inst->Dst[0], &src_xxxx, NULL, NULL,
6791 inst->Instruction.Saturate,
6792 inst->Instruction.Precise);
6793
6794 return TRUE;
6795 }
6796
6797
6798 /**
6799 * Emit code for TGSI_OPCODE_LIT instruction.
6800 */
6801 static boolean
6802 emit_lit(struct svga_shader_emitter_v10 *emit,
6803 const struct tgsi_full_instruction *inst)
6804 {
6805 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
6806
6807 /*
6808 * If dst and src are the same we need to create
6809 * a temporary for it and insert a extra move.
6810 */
6811 unsigned tmp_move = get_temp_index(emit);
6812 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
6813 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
6814
6815 /*
6816 * dst.x = 1
6817 * dst.y = max(src.x, 0)
6818 * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
6819 * dst.w = 1
6820 */
6821
6822 /* MOV dst.x, 1.0 */
6823 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
6824 struct tgsi_full_dst_register dst_x =
6825 writemask_dst(&move_dst, TGSI_WRITEMASK_X);
6826 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
6827 }
6828
6829 /* MOV dst.w, 1.0 */
6830 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
6831 struct tgsi_full_dst_register dst_w =
6832 writemask_dst(&move_dst, TGSI_WRITEMASK_W);
6833 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
6834 }
6835
6836 /* MAX dst.y, src.x, 0.0 */
6837 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
6838 struct tgsi_full_dst_register dst_y =
6839 writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
6840 struct tgsi_full_src_register zero =
6841 make_immediate_reg_float(emit, 0.0f);
6842 struct tgsi_full_src_register src_xxxx =
6843 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
6844 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
6845
6846 emit_instruction_opn(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx,
6847 &zero, NULL, inst->Instruction.Saturate, FALSE);
6848 }
6849
6850 /*
6851 * tmp1 = clamp(src.w, -128, 128);
6852 * MAX tmp1, src.w, -128
6853 * MIN tmp1, tmp1, 128
6854 *
6855 * tmp2 = max(tmp2, 0);
6856 * MAX tmp2, src.y, 0
6857 *
6858 * tmp1 = pow(tmp2, tmp1);
6859 * LOG tmp2, tmp2
6860 * MUL tmp1, tmp2, tmp1
6861 * EXP tmp1, tmp1
6862 *
6863 * tmp1 = (src.w == 0) ? 1 : tmp1;
6864 * EQ tmp2, 0, src.w
6865 * MOVC tmp1, tmp2, 1.0, tmp1
6866 *
6867 * dst.z = (0 < src.x) ? tmp1 : 0;
6868 * LT tmp2, 0, src.x
6869 * MOVC dst.z, tmp2, tmp1, 0.0
6870 */
6871 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
6872 struct tgsi_full_dst_register dst_z =
6873 writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
6874
6875 unsigned tmp1 = get_temp_index(emit);
6876 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
6877 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
6878 unsigned tmp2 = get_temp_index(emit);
6879 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
6880 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
6881
6882 struct tgsi_full_src_register src_xxxx =
6883 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
6884 struct tgsi_full_src_register src_yyyy =
6885 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
6886 struct tgsi_full_src_register src_wwww =
6887 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
6888
6889 struct tgsi_full_src_register zero =
6890 make_immediate_reg_float(emit, 0.0f);
6891 struct tgsi_full_src_register lowerbound =
6892 make_immediate_reg_float(emit, -128.0f);
6893 struct tgsi_full_src_register upperbound =
6894 make_immediate_reg_float(emit, 128.0f);
6895
6896 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww,
6897 &lowerbound);
6898 emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src,
6899 &upperbound);
6900 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy,
6901 &zero);
6902
6903 /* POW tmp1, tmp2, tmp1 */
6904 /* LOG tmp2, tmp2 */
6905 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src);
6906
6907 /* MUL tmp1, tmp2, tmp1 */
6908 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src,
6909 &tmp1_src);
6910
6911 /* EXP tmp1, tmp1 */
6912 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src);
6913
6914 /* EQ tmp2, 0, src.w */
6915 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, &src_wwww);
6916 /* MOVC tmp1.z, tmp2, tmp1, 1.0 */
6917 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst,
6918 &tmp2_src, &one, &tmp1_src);
6919
6920 /* LT tmp2, 0, src.x */
6921 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, &src_xxxx);
6922 /* MOVC dst.z, tmp2, tmp1, 0.0 */
6923 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z,
6924 &tmp2_src, &tmp1_src, &zero);
6925 }
6926
6927 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
6928 free_temp_indexes(emit);
6929
6930 return TRUE;
6931 }
6932
6933
6934 /**
6935 * Emit Level Of Detail Query (LODQ) instruction.
6936 */
6937 static boolean
6938 emit_lodq(struct svga_shader_emitter_v10 *emit,
6939 const struct tgsi_full_instruction *inst)
6940 {
6941 const uint unit = inst->Src[1].Register.Index;
6942
6943 assert(emit->version >= 41);
6944
6945 /* LOD dst, coord, resource, sampler */
6946 begin_emit_instruction(emit);
6947 emit_opcode(emit, VGPU10_OPCODE_LOD, FALSE);
6948 emit_dst_register(emit, &inst->Dst[0]);
6949 emit_src_register(emit, &inst->Src[0]); /* coord */
6950 emit_resource_register(emit, unit);
6951 emit_sampler_register(emit, unit);
6952 end_emit_instruction(emit);
6953
6954 return TRUE;
6955 }
6956
6957
6958 /**
6959 * Emit code for TGSI_OPCODE_LOG instruction.
6960 */
6961 static boolean
6962 emit_log(struct svga_shader_emitter_v10 *emit,
6963 const struct tgsi_full_instruction *inst)
6964 {
6965 /*
6966 * dst.x = floor(lg2(abs(s0.x)))
6967 * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x))))
6968 * dst.z = lg2(abs(s0.x))
6969 * dst.w = 1.0
6970 */
6971
6972 struct tgsi_full_src_register src_xxxx =
6973 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
6974 unsigned tmp = get_temp_index(emit);
6975 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6976 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6977 struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx);
6978
6979 /* only use X component of temp reg */
6980 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
6981 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
6982
6983 /* LOG tmp.x, abs(s0.x) */
6984 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
6985 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &abs_src_xxxx);
6986 }
6987
6988 /* MOV dst.z, tmp.x */
6989 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
6990 struct tgsi_full_dst_register dst_z =
6991 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z);
6992
6993 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
6994 &dst_z, &tmp_src, NULL, NULL,
6995 inst->Instruction.Saturate, FALSE);
6996 }
6997
6998 /* FLR tmp.x, tmp.x */
6999 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
7000 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, &tmp_src);
7001 }
7002
7003 /* MOV dst.x, tmp.x */
7004 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7005 struct tgsi_full_dst_register dst_x =
7006 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
7007
7008 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7009 &dst_x, &tmp_src, NULL, NULL,
7010 inst->Instruction.Saturate, FALSE);
7011 }
7012
7013 /* EXP tmp.x, tmp.x */
7014 /* DIV dst.y, abs(s0.x), tmp.x */
7015 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7016 struct tgsi_full_dst_register dst_y =
7017 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
7018
7019 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src);
7020 emit_instruction_opn(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx,
7021 &tmp_src, NULL, inst->Instruction.Saturate, FALSE);
7022 }
7023
7024 /* MOV dst.w, 1.0 */
7025 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7026 struct tgsi_full_dst_register dst_w =
7027 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W);
7028 struct tgsi_full_src_register one =
7029 make_immediate_reg_float(emit, 1.0f);
7030
7031 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7032 }
7033
7034 free_temp_indexes(emit);
7035
7036 return TRUE;
7037 }
7038
7039
7040 /**
7041 * Emit code for TGSI_OPCODE_LRP instruction.
7042 */
7043 static boolean
7044 emit_lrp(struct svga_shader_emitter_v10 *emit,
7045 const struct tgsi_full_instruction *inst)
7046 {
7047 /* dst = LRP(s0, s1, s2):
7048 * dst = s0 * (s1 - s2) + s2
7049 * Translates into:
7050 * SUB tmp, s1, s2; tmp = s1 - s2
7051 * MAD dst, s0, tmp, s2; dst = s0 * t1 + s2
7052 */
7053 unsigned tmp = get_temp_index(emit);
7054 struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp);
7055 struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp);
7056 struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]);
7057
7058 /* ADD tmp, s1, -s2 */
7059 emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_tmp,
7060 &inst->Src[1], &neg_src2, NULL, FALSE,
7061 inst->Instruction.Precise);
7062
7063 /* MAD dst, s1, tmp, s3 */
7064 emit_instruction_opn(emit, VGPU10_OPCODE_MAD, &inst->Dst[0],
7065 &inst->Src[0], &src_tmp, &inst->Src[2],
7066 inst->Instruction.Saturate,
7067 inst->Instruction.Precise);
7068
7069 free_temp_indexes(emit);
7070
7071 return TRUE;
7072 }
7073
7074
7075 /**
7076 * Emit code for TGSI_OPCODE_POW instruction.
7077 */
7078 static boolean
7079 emit_pow(struct svga_shader_emitter_v10 *emit,
7080 const struct tgsi_full_instruction *inst)
7081 {
7082 /* Note that TGSI_OPCODE_POW computes only one value from src0.x and
7083 * src1.x while VGPU10 computes four values.
7084 *
7085 * dst = POW(src0, src1):
7086 * dst.xyzw = src0.x ^ src1.x
7087 */
7088 unsigned tmp = get_temp_index(emit);
7089 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7090 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7091 struct tgsi_full_src_register src0_xxxx =
7092 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7093 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7094 struct tgsi_full_src_register src1_xxxx =
7095 swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7096 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7097
7098 /* LOG tmp, s0.xxxx */
7099 emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
7100 &tmp_dst, &src0_xxxx, NULL, NULL,
7101 FALSE, inst->Instruction.Precise);
7102
7103 /* MUL tmp, tmp, s1.xxxx */
7104 emit_instruction_opn(emit, VGPU10_OPCODE_MUL,
7105 &tmp_dst, &tmp_src, &src1_xxxx, NULL,
7106 FALSE, inst->Instruction.Precise);
7107
7108 /* EXP tmp, s0.xxxx */
7109 emit_instruction_opn(emit, VGPU10_OPCODE_EXP,
7110 &inst->Dst[0], &tmp_src, NULL, NULL,
7111 inst->Instruction.Saturate,
7112 inst->Instruction.Precise);
7113
7114 /* free tmp */
7115 free_temp_indexes(emit);
7116
7117 return TRUE;
7118 }
7119
7120
7121 /**
7122 * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction.
7123 */
7124 static boolean
7125 emit_rcp(struct svga_shader_emitter_v10 *emit,
7126 const struct tgsi_full_instruction *inst)
7127 {
7128 if (emit->version >= 50) {
7129 /* use new RCP instruction. But VGPU10_OPCODE_RCP is component-wise
7130 * while TGSI_OPCODE_RCP computes dst.xyzw = 1.0 / src.xxxx so we need
7131 * to manipulate the src register's swizzle.
7132 */
7133 struct tgsi_full_src_register src = inst->Src[0];
7134 src.Register.SwizzleY =
7135 src.Register.SwizzleZ =
7136 src.Register.SwizzleW = src.Register.SwizzleX;
7137
7138 begin_emit_instruction(emit);
7139 emit_opcode_precise(emit, VGPU10_OPCODE_RCP,
7140 inst->Instruction.Saturate,
7141 inst->Instruction.Precise);
7142 emit_dst_register(emit, &inst->Dst[0]);
7143 emit_src_register(emit, &src);
7144 end_emit_instruction(emit);
7145 }
7146 else {
7147 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7148
7149 unsigned tmp = get_temp_index(emit);
7150 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7151 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7152
7153 struct tgsi_full_dst_register tmp_dst_x =
7154 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7155 struct tgsi_full_src_register tmp_src_xxxx =
7156 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7157
7158 /* DIV tmp.x, 1.0, s0 */
7159 emit_instruction_opn(emit, VGPU10_OPCODE_DIV,
7160 &tmp_dst_x, &one, &inst->Src[0], NULL,
7161 FALSE, inst->Instruction.Precise);
7162
7163 /* MOV dst, tmp.xxxx */
7164 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7165 &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7166 inst->Instruction.Saturate,
7167 inst->Instruction.Precise);
7168
7169 free_temp_indexes(emit);
7170 }
7171
7172 return TRUE;
7173 }
7174
7175
7176 /**
7177 * Emit code for TGSI_OPCODE_RSQ instruction.
7178 */
7179 static boolean
7180 emit_rsq(struct svga_shader_emitter_v10 *emit,
7181 const struct tgsi_full_instruction *inst)
7182 {
7183 /* dst = RSQ(src):
7184 * dst.xyzw = 1 / sqrt(src.x)
7185 * Translates into:
7186 * RSQ tmp, src.x
7187 * MOV dst, tmp.xxxx
7188 */
7189
7190 unsigned tmp = get_temp_index(emit);
7191 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7192 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7193
7194 struct tgsi_full_dst_register tmp_dst_x =
7195 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7196 struct tgsi_full_src_register tmp_src_xxxx =
7197 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7198
7199 /* RSQ tmp, src.x */
7200 emit_instruction_opn(emit, VGPU10_OPCODE_RSQ,
7201 &tmp_dst_x, &inst->Src[0], NULL, NULL,
7202 FALSE, inst->Instruction.Precise);
7203
7204 /* MOV dst, tmp.xxxx */
7205 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7206 &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7207 inst->Instruction.Saturate,
7208 inst->Instruction.Precise);
7209
7210 /* free tmp */
7211 free_temp_indexes(emit);
7212
7213 return TRUE;
7214 }
7215
7216
7217 /**
7218 * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
7219 */
7220 static boolean
7221 emit_seq(struct svga_shader_emitter_v10 *emit,
7222 const struct tgsi_full_instruction *inst)
7223 {
7224 /* dst = SEQ(s0, s1):
7225 * dst = s0 == s1 ? 1.0 : 0.0 (per component)
7226 * Translates into:
7227 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp)
7228 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
7229 */
7230 unsigned tmp = get_temp_index(emit);
7231 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7232 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7233 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7234 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7235
7236 /* EQ tmp, s0, s1 */
7237 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0],
7238 &inst->Src[1]);
7239
7240 /* MOVC dst, tmp, one, zero */
7241 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7242 &one, &zero);
7243
7244 free_temp_indexes(emit);
7245
7246 return TRUE;
7247 }
7248
7249
7250 /**
7251 * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction.
7252 */
7253 static boolean
7254 emit_sge(struct svga_shader_emitter_v10 *emit,
7255 const struct tgsi_full_instruction *inst)
7256 {
7257 /* dst = SGE(s0, s1):
7258 * dst = s0 >= s1 ? 1.0 : 0.0 (per component)
7259 * Translates into:
7260 * GE tmp, s0, s1; tmp = s0 >= s1 : 0xffffffff : 0 (per comp)
7261 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
7262 */
7263 unsigned tmp = get_temp_index(emit);
7264 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7265 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7266 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7267 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7268
7269 /* GE tmp, s0, s1 */
7270 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0],
7271 &inst->Src[1]);
7272
7273 /* MOVC dst, tmp, one, zero */
7274 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7275 &one, &zero);
7276
7277 free_temp_indexes(emit);
7278
7279 return TRUE;
7280 }
7281
7282
7283 /**
7284 * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction.
7285 */
7286 static boolean
7287 emit_sgt(struct svga_shader_emitter_v10 *emit,
7288 const struct tgsi_full_instruction *inst)
7289 {
7290 /* dst = SGT(s0, s1):
7291 * dst = s0 > s1 ? 1.0 : 0.0 (per component)
7292 * Translates into:
7293 * LT tmp, s1, s0; tmp = s1 < s0 ? 0xffffffff : 0 (per comp)
7294 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
7295 */
7296 unsigned tmp = get_temp_index(emit);
7297 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7298 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7299 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7300 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7301
7302 /* LT tmp, s1, s0 */
7303 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1],
7304 &inst->Src[0]);
7305
7306 /* MOVC dst, tmp, one, zero */
7307 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7308 &one, &zero);
7309
7310 free_temp_indexes(emit);
7311
7312 return TRUE;
7313 }
7314
7315
7316 /**
7317 * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions.
7318 */
7319 static boolean
7320 emit_sincos(struct svga_shader_emitter_v10 *emit,
7321 const struct tgsi_full_instruction *inst)
7322 {
7323 unsigned tmp = get_temp_index(emit);
7324 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7325 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7326
7327 struct tgsi_full_src_register tmp_src_xxxx =
7328 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7329 struct tgsi_full_dst_register tmp_dst_x =
7330 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7331
7332 begin_emit_instruction(emit);
7333 emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE);
7334
7335 if(inst->Instruction.Opcode == TGSI_OPCODE_SIN)
7336 {
7337 emit_dst_register(emit, &tmp_dst_x); /* first destination register */
7338 emit_null_dst_register(emit); /* second destination register */
7339 }
7340 else {
7341 emit_null_dst_register(emit);
7342 emit_dst_register(emit, &tmp_dst_x);
7343 }
7344
7345 emit_src_register(emit, &inst->Src[0]);
7346 end_emit_instruction(emit);
7347
7348 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7349 &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7350 inst->Instruction.Saturate,
7351 inst->Instruction.Precise);
7352
7353 free_temp_indexes(emit);
7354
7355 return TRUE;
7356 }
7357
7358
7359 /**
7360 * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction.
7361 */
7362 static boolean
7363 emit_sle(struct svga_shader_emitter_v10 *emit,
7364 const struct tgsi_full_instruction *inst)
7365 {
7366 /* dst = SLE(s0, s1):
7367 * dst = s0 <= s1 ? 1.0 : 0.0 (per component)
7368 * Translates into:
7369 * GE tmp, s1, s0; tmp = s1 >= s0 : 0xffffffff : 0 (per comp)
7370 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
7371 */
7372 unsigned tmp = get_temp_index(emit);
7373 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7374 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7375 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7376 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7377
7378 /* GE tmp, s1, s0 */
7379 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1],
7380 &inst->Src[0]);
7381
7382 /* MOVC dst, tmp, one, zero */
7383 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7384 &one, &zero);
7385
7386 free_temp_indexes(emit);
7387
7388 return TRUE;
7389 }
7390
7391
7392 /**
7393 * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction.
7394 */
7395 static boolean
7396 emit_slt(struct svga_shader_emitter_v10 *emit,
7397 const struct tgsi_full_instruction *inst)
7398 {
7399 /* dst = SLT(s0, s1):
7400 * dst = s0 < s1 ? 1.0 : 0.0 (per component)
7401 * Translates into:
7402 * LT tmp, s0, s1; tmp = s0 < s1 ? 0xffffffff : 0 (per comp)
7403 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
7404 */
7405 unsigned tmp = get_temp_index(emit);
7406 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7407 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7408 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7409 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7410
7411 /* LT tmp, s0, s1 */
7412 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
7413 &inst->Src[1]);
7414
7415 /* MOVC dst, tmp, one, zero */
7416 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7417 &one, &zero);
7418
7419 free_temp_indexes(emit);
7420
7421 return TRUE;
7422 }
7423
7424
7425 /**
7426 * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction.
7427 */
7428 static boolean
7429 emit_sne(struct svga_shader_emitter_v10 *emit,
7430 const struct tgsi_full_instruction *inst)
7431 {
7432 /* dst = SNE(s0, s1):
7433 * dst = s0 != s1 ? 1.0 : 0.0 (per component)
7434 * Translates into:
7435 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp)
7436 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
7437 */
7438 unsigned tmp = get_temp_index(emit);
7439 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7440 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7441 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7442 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7443
7444 /* NE tmp, s0, s1 */
7445 emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0],
7446 &inst->Src[1]);
7447
7448 /* MOVC dst, tmp, one, zero */
7449 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7450 &one, &zero);
7451
7452 free_temp_indexes(emit);
7453
7454 return TRUE;
7455 }
7456
7457
7458 /**
7459 * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction.
7460 */
7461 static boolean
7462 emit_ssg(struct svga_shader_emitter_v10 *emit,
7463 const struct tgsi_full_instruction *inst)
7464 {
7465 /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
7466 * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
7467 * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
7468 * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
7469 * Translates into:
7470 * LT tmp1, src, zero; tmp1 = src < zero ? 0xffffffff : 0 (per comp)
7471 * MOVC tmp2, tmp1, -1.0, 0.0; tmp2 = tmp1 ? -1.0 : 0.0 (per component)
7472 * LT tmp1, zero, src; tmp1 = zero < src ? 0xffffffff : 0 (per comp)
7473 * MOVC dst, tmp1, 1.0, tmp2; dst = tmp1 ? 1.0 : tmp2 (per component)
7474 */
7475 struct tgsi_full_src_register zero =
7476 make_immediate_reg_float(emit, 0.0f);
7477 struct tgsi_full_src_register one =
7478 make_immediate_reg_float(emit, 1.0f);
7479 struct tgsi_full_src_register neg_one =
7480 make_immediate_reg_float(emit, -1.0f);
7481
7482 unsigned tmp1 = get_temp_index(emit);
7483 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
7484 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
7485
7486 unsigned tmp2 = get_temp_index(emit);
7487 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
7488 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
7489
7490 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0],
7491 &zero);
7492 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src,
7493 &neg_one, &zero);
7494 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero,
7495 &inst->Src[0]);
7496 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src,
7497 &one, &tmp2_src);
7498
7499 free_temp_indexes(emit);
7500
7501 return TRUE;
7502 }
7503
7504
7505 /**
7506 * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction.
7507 */
7508 static boolean
7509 emit_issg(struct svga_shader_emitter_v10 *emit,
7510 const struct tgsi_full_instruction *inst)
7511 {
7512 /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
7513 * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
7514 * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
7515 * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
7516 * Translates into:
7517 * ILT tmp1, src, 0 tmp1 = src < 0 ? -1 : 0 (per component)
7518 * ILT tmp2, 0, src tmp2 = 0 < src ? -1 : 0 (per component)
7519 * IADD dst, tmp1, neg(tmp2) dst = tmp1 - tmp2 (per component)
7520 */
7521 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7522
7523 unsigned tmp1 = get_temp_index(emit);
7524 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
7525 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
7526
7527 unsigned tmp2 = get_temp_index(emit);
7528 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
7529 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
7530
7531 struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src);
7532
7533 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst,
7534 &inst->Src[0], &zero);
7535 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst,
7536 &zero, &inst->Src[0]);
7537 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0],
7538 &tmp1_src, &neg_tmp2);
7539
7540 free_temp_indexes(emit);
7541
7542 return TRUE;
7543 }
7544
7545
7546 /**
7547 * Emit a comparison instruction. The dest register will get
7548 * 0 or ~0 values depending on the outcome of comparing src0 to src1.
7549 */
7550 static void
7551 emit_comparison(struct svga_shader_emitter_v10 *emit,
7552 SVGA3dCmpFunc func,
7553 const struct tgsi_full_dst_register *dst,
7554 const struct tgsi_full_src_register *src0,
7555 const struct tgsi_full_src_register *src1)
7556 {
7557 struct tgsi_full_src_register immediate;
7558 VGPU10OpcodeToken0 opcode0;
7559 boolean swapSrc = FALSE;
7560
7561 /* Sanity checks for svga vs. gallium enums */
7562 STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1));
7563 STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1));
7564
7565 opcode0.value = 0;
7566
7567 switch (func) {
7568 case SVGA3D_CMP_NEVER:
7569 immediate = make_immediate_reg_int(emit, 0);
7570 /* MOV dst, {0} */
7571 begin_emit_instruction(emit);
7572 emit_dword(emit, VGPU10_OPCODE_MOV);
7573 emit_dst_register(emit, dst);
7574 emit_src_register(emit, &immediate);
7575 end_emit_instruction(emit);
7576 return;
7577 case SVGA3D_CMP_ALWAYS:
7578 immediate = make_immediate_reg_int(emit, -1);
7579 /* MOV dst, {-1} */
7580 begin_emit_instruction(emit);
7581 emit_dword(emit, VGPU10_OPCODE_MOV);
7582 emit_dst_register(emit, dst);
7583 emit_src_register(emit, &immediate);
7584 end_emit_instruction(emit);
7585 return;
7586 case SVGA3D_CMP_LESS:
7587 opcode0.opcodeType = VGPU10_OPCODE_LT;
7588 break;
7589 case SVGA3D_CMP_EQUAL:
7590 opcode0.opcodeType = VGPU10_OPCODE_EQ;
7591 break;
7592 case SVGA3D_CMP_LESSEQUAL:
7593 opcode0.opcodeType = VGPU10_OPCODE_GE;
7594 swapSrc = TRUE;
7595 break;
7596 case SVGA3D_CMP_GREATER:
7597 opcode0.opcodeType = VGPU10_OPCODE_LT;
7598 swapSrc = TRUE;
7599 break;
7600 case SVGA3D_CMP_NOTEQUAL:
7601 opcode0.opcodeType = VGPU10_OPCODE_NE;
7602 break;
7603 case SVGA3D_CMP_GREATEREQUAL:
7604 opcode0.opcodeType = VGPU10_OPCODE_GE;
7605 break;
7606 default:
7607 assert(!"Unexpected comparison mode");
7608 opcode0.opcodeType = VGPU10_OPCODE_EQ;
7609 }
7610
7611 begin_emit_instruction(emit);
7612 emit_dword(emit, opcode0.value);
7613 emit_dst_register(emit, dst);
7614 if (swapSrc) {
7615 emit_src_register(emit, src1);
7616 emit_src_register(emit, src0);
7617 }
7618 else {
7619 emit_src_register(emit, src0);
7620 emit_src_register(emit, src1);
7621 }
7622 end_emit_instruction(emit);
7623 }
7624
7625
7626 /**
7627 * Get texel/address offsets for a texture instruction.
7628 */
7629 static void
7630 get_texel_offsets(const struct svga_shader_emitter_v10 *emit,
7631 const struct tgsi_full_instruction *inst, int offsets[3])
7632 {
7633 if (inst->Texture.NumOffsets == 1) {
7634 /* According to OpenGL Shader Language spec the offsets are only
7635 * fetched from a previously-declared immediate/literal.
7636 */
7637 const struct tgsi_texture_offset *off = inst->TexOffsets;
7638 const unsigned index = off[0].Index;
7639 const unsigned swizzleX = off[0].SwizzleX;
7640 const unsigned swizzleY = off[0].SwizzleY;
7641 const unsigned swizzleZ = off[0].SwizzleZ;
7642 const union tgsi_immediate_data *imm = emit->immediates[index];
7643
7644 assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE);
7645
7646 offsets[0] = imm[swizzleX].Int;
7647 offsets[1] = imm[swizzleY].Int;
7648 offsets[2] = imm[swizzleZ].Int;
7649 }
7650 else {
7651 offsets[0] = offsets[1] = offsets[2] = 0;
7652 }
7653 }
7654
7655
7656 /**
7657 * Set up the coordinate register for texture sampling.
7658 * When we're sampling from a RECT texture we have to scale the
7659 * unnormalized coordinate to a normalized coordinate.
7660 * We do that by multiplying the coordinate by an "extra" constant.
7661 * An alternative would be to use the RESINFO instruction to query the
7662 * texture's size.
7663 */
7664 static struct tgsi_full_src_register
7665 setup_texcoord(struct svga_shader_emitter_v10 *emit,
7666 unsigned unit,
7667 const struct tgsi_full_src_register *coord)
7668 {
7669 if (emit->sampler_view[unit] && emit->key.tex[unit].unnormalized) {
7670 unsigned scale_index = emit->texcoord_scale_index[unit];
7671 unsigned tmp = get_temp_index(emit);
7672 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7673 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7674 struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
7675
7676 if (emit->key.tex[unit].texel_bias) {
7677 /* to fix texture coordinate rounding issue, 0.0001 offset is
7678 * been added. This fixes piglit test fbo-blit-scaled-linear. */
7679 struct tgsi_full_src_register offset =
7680 make_immediate_reg_float(emit, 0.0001f);
7681
7682 /* ADD tmp, coord, offset */
7683 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_dst,
7684 coord, &offset);
7685 /* MUL tmp, tmp, scale */
7686 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
7687 &tmp_src, &scale_src);
7688 }
7689 else {
7690 /* MUL tmp, coord, const[] */
7691 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
7692 coord, &scale_src);
7693 }
7694 return tmp_src;
7695 }
7696 else {
7697 /* use texcoord as-is */
7698 return *coord;
7699 }
7700 }
7701
7702
7703 /**
7704 * For SAMPLE_C instructions, emit the extra src register which indicates
7705 * the reference/comparision value.
7706 */
7707 static void
7708 emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
7709 enum tgsi_texture_type target,
7710 const struct tgsi_full_src_register *coord)
7711 {
7712 struct tgsi_full_src_register coord_src_ref;
7713 int component;
7714
7715 assert(tgsi_is_shadow_target(target));
7716
7717 component = tgsi_util_get_shadow_ref_src_index(target) % 4;
7718 assert(component >= 0);
7719
7720 coord_src_ref = scalar_src(coord, component);
7721
7722 emit_src_register(emit, &coord_src_ref);
7723 }
7724
7725
7726 /**
7727 * Info for implementing texture swizzles.
7728 * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle()
7729 * functions use this to encapsulate the extra steps needed to perform
7730 * a texture swizzle, or shadow/depth comparisons.
7731 * The shadow/depth comparison is only done here if for the cases where
7732 * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare).
7733 */
7734 struct tex_swizzle_info
7735 {
7736 boolean swizzled;
7737 boolean shadow_compare;
7738 unsigned unit;
7739 enum tgsi_texture_type texture_target; /**< TGSI_TEXTURE_x */
7740 struct tgsi_full_src_register tmp_src;
7741 struct tgsi_full_dst_register tmp_dst;
7742 const struct tgsi_full_dst_register *inst_dst;
7743 const struct tgsi_full_src_register *coord_src;
7744 };
7745
7746
7747 /**
7748 * Do setup for handling texture swizzles or shadow compares.
7749 * \param unit the texture unit
7750 * \param inst the TGSI texture instruction
7751 * \param shadow_compare do shadow/depth comparison?
7752 * \param swz returns the swizzle info
7753 */
7754 static void
7755 begin_tex_swizzle(struct svga_shader_emitter_v10 *emit,
7756 unsigned unit,
7757 const struct tgsi_full_instruction *inst,
7758 boolean shadow_compare,
7759 struct tex_swizzle_info *swz)
7760 {
7761 swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X ||
7762 emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y ||
7763 emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z ||
7764 emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W);
7765
7766 swz->shadow_compare = shadow_compare;
7767 swz->texture_target = inst->Texture.Texture;
7768
7769 if (swz->swizzled || shadow_compare) {
7770 /* Allocate temp register for the result of the SAMPLE instruction
7771 * and the source of the MOV/compare/swizzle instructions.
7772 */
7773 unsigned tmp = get_temp_index(emit);
7774 swz->tmp_src = make_src_temp_reg(tmp);
7775 swz->tmp_dst = make_dst_temp_reg(tmp);
7776
7777 swz->unit = unit;
7778 }
7779 swz->inst_dst = &inst->Dst[0];
7780 swz->coord_src = &inst->Src[0];
7781
7782 emit->fs.shadow_compare_units |= shadow_compare << unit;
7783 }
7784
7785
7786 /**
7787 * Returns the register to put the SAMPLE instruction results into.
7788 * This will either be the original instruction dst reg (if no swizzle
7789 * and no shadow comparison) or a temporary reg if there is a swizzle.
7790 */
7791 static const struct tgsi_full_dst_register *
7792 get_tex_swizzle_dst(const struct tex_swizzle_info *swz)
7793 {
7794 return (swz->swizzled || swz->shadow_compare)
7795 ? &swz->tmp_dst : swz->inst_dst;
7796 }
7797
7798
7799 /**
7800 * This emits the MOV instruction that actually implements a texture swizzle
7801 * and/or shadow comparison.
7802 */
7803 static void
7804 end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
7805 const struct tex_swizzle_info *swz)
7806 {
7807 if (swz->shadow_compare) {
7808 /* Emit extra instructions to compare the fetched texel value against
7809 * a texture coordinate component. The result of the comparison
7810 * is 0.0 or 1.0.
7811 */
7812 struct tgsi_full_src_register coord_src;
7813 struct tgsi_full_src_register texel_src =
7814 scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X);
7815 struct tgsi_full_src_register one =
7816 make_immediate_reg_float(emit, 1.0f);
7817 /* convert gallium comparison func to SVGA comparison func */
7818 SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1;
7819
7820 int component =
7821 tgsi_util_get_shadow_ref_src_index(swz->texture_target) % 4;
7822 assert(component >= 0);
7823 coord_src = scalar_src(swz->coord_src, component);
7824
7825 /* COMPARE tmp, coord, texel */
7826 emit_comparison(emit, compare_func,
7827 &swz->tmp_dst, &coord_src, &texel_src);
7828
7829 /* AND dest, tmp, {1.0} */
7830 begin_emit_instruction(emit);
7831 emit_opcode(emit, VGPU10_OPCODE_AND, FALSE);
7832 if (swz->swizzled) {
7833 emit_dst_register(emit, &swz->tmp_dst);
7834 }
7835 else {
7836 emit_dst_register(emit, swz->inst_dst);
7837 }
7838 emit_src_register(emit, &swz->tmp_src);
7839 emit_src_register(emit, &one);
7840 end_emit_instruction(emit);
7841 }
7842
7843 if (swz->swizzled) {
7844 unsigned swz_r = emit->key.tex[swz->unit].swizzle_r;
7845 unsigned swz_g = emit->key.tex[swz->unit].swizzle_g;
7846 unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
7847 unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
7848 unsigned writemask_0 = 0, writemask_1 = 0;
7849 boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]);
7850
7851 /* Swizzle w/out zero/one terms */
7852 struct tgsi_full_src_register src_swizzled =
7853 swizzle_src(&swz->tmp_src,
7854 swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X,
7855 swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y,
7856 swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z,
7857 swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W);
7858
7859 /* MOV dst, color(tmp).<swizzle> */
7860 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
7861 swz->inst_dst, &src_swizzled);
7862
7863 /* handle swizzle zero terms */
7864 writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) |
7865 ((swz_g == PIPE_SWIZZLE_0) << 1) |
7866 ((swz_b == PIPE_SWIZZLE_0) << 2) |
7867 ((swz_a == PIPE_SWIZZLE_0) << 3));
7868 writemask_0 &= swz->inst_dst->Register.WriteMask;
7869
7870 if (writemask_0) {
7871 struct tgsi_full_src_register zero = int_tex ?
7872 make_immediate_reg_int(emit, 0) :
7873 make_immediate_reg_float(emit, 0.0f);
7874 struct tgsi_full_dst_register dst =
7875 writemask_dst(swz->inst_dst, writemask_0);
7876
7877 /* MOV dst.writemask_0, {0,0,0,0} */
7878 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &zero);
7879 }
7880
7881 /* handle swizzle one terms */
7882 writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) |
7883 ((swz_g == PIPE_SWIZZLE_1) << 1) |
7884 ((swz_b == PIPE_SWIZZLE_1) << 2) |
7885 ((swz_a == PIPE_SWIZZLE_1) << 3));
7886 writemask_1 &= swz->inst_dst->Register.WriteMask;
7887
7888 if (writemask_1) {
7889 struct tgsi_full_src_register one = int_tex ?
7890 make_immediate_reg_int(emit, 1) :
7891 make_immediate_reg_float(emit, 1.0f);
7892 struct tgsi_full_dst_register dst =
7893 writemask_dst(swz->inst_dst, writemask_1);
7894
7895 /* MOV dst.writemask_1, {1,1,1,1} */
7896 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one);
7897 }
7898 }
7899 }
7900
7901
7902 /**
7903 * Emit code for TGSI_OPCODE_SAMPLE instruction.
7904 */
7905 static boolean
7906 emit_sample(struct svga_shader_emitter_v10 *emit,
7907 const struct tgsi_full_instruction *inst)
7908 {
7909 const unsigned resource_unit = inst->Src[1].Register.Index;
7910 const unsigned sampler_unit = inst->Src[2].Register.Index;
7911 struct tgsi_full_src_register coord;
7912 int offsets[3];
7913 struct tex_swizzle_info swz_info;
7914
7915 begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info);
7916
7917 get_texel_offsets(emit, inst, offsets);
7918
7919 coord = setup_texcoord(emit, resource_unit, &inst->Src[0]);
7920
7921 /* SAMPLE dst, coord(s0), resource, sampler */
7922 begin_emit_instruction(emit);
7923
7924 /* NOTE: for non-fragment shaders, we should use VGPU10_OPCODE_SAMPLE_L
7925 * with LOD=0. But our virtual GPU accepts this as-is.
7926 */
7927 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
7928 inst->Instruction.Saturate, offsets);
7929 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
7930 emit_src_register(emit, &coord);
7931 emit_resource_register(emit, resource_unit);
7932 emit_sampler_register(emit, sampler_unit);
7933 end_emit_instruction(emit);
7934
7935 end_tex_swizzle(emit, &swz_info);
7936
7937 free_temp_indexes(emit);
7938
7939 return TRUE;
7940 }
7941
7942
7943 /**
7944 * Check if a texture instruction is valid.
7945 * An example of an invalid texture instruction is doing shadow comparison
7946 * with an integer-valued texture.
7947 * If we detect an invalid texture instruction, we replace it with:
7948 * MOV dst, {1,1,1,1};
7949 * \return TRUE if valid, FALSE if invalid.
7950 */
7951 static boolean
7952 is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
7953 const struct tgsi_full_instruction *inst)
7954 {
7955 const unsigned unit = inst->Src[1].Register.Index;
7956 const enum tgsi_texture_type target = inst->Texture.Texture;
7957 boolean valid = TRUE;
7958
7959 if (tgsi_is_shadow_target(target) &&
7960 is_integer_type(emit->sampler_return_type[unit])) {
7961 debug_printf("Invalid SAMPLE_C with an integer texture!\n");
7962 valid = FALSE;
7963 }
7964 /* XXX might check for other conditions in the future here */
7965
7966 if (!valid) {
7967 /* emit a MOV dst, {1,1,1,1} instruction. */
7968 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7969 begin_emit_instruction(emit);
7970 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
7971 emit_dst_register(emit, &inst->Dst[0]);
7972 emit_src_register(emit, &one);
7973 end_emit_instruction(emit);
7974 }
7975
7976 return valid;
7977 }
7978
7979
7980 /**
7981 * Emit code for TGSI_OPCODE_TEX (simple texture lookup)
7982 */
7983 static boolean
7984 emit_tex(struct svga_shader_emitter_v10 *emit,
7985 const struct tgsi_full_instruction *inst)
7986 {
7987 const uint unit = inst->Src[1].Register.Index;
7988 const enum tgsi_texture_type target = inst->Texture.Texture;
7989 VGPU10_OPCODE_TYPE opcode;
7990 struct tgsi_full_src_register coord;
7991 int offsets[3];
7992 struct tex_swizzle_info swz_info;
7993
7994 /* check that the sampler returns a float */
7995 if (!is_valid_tex_instruction(emit, inst))
7996 return TRUE;
7997
7998 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
7999
8000 get_texel_offsets(emit, inst, offsets);
8001
8002 coord = setup_texcoord(emit, unit, &inst->Src[0]);
8003
8004 /* SAMPLE dst, coord(s0), resource, sampler */
8005 begin_emit_instruction(emit);
8006
8007 if (tgsi_is_shadow_target(target))
8008 opcode = VGPU10_OPCODE_SAMPLE_C;
8009 else
8010 opcode = VGPU10_OPCODE_SAMPLE;
8011
8012 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8013 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8014 emit_src_register(emit, &coord);
8015 emit_resource_register(emit, unit);
8016 emit_sampler_register(emit, unit);
8017 if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8018 emit_tex_compare_refcoord(emit, target, &coord);
8019 }
8020 end_emit_instruction(emit);
8021
8022 end_tex_swizzle(emit, &swz_info);
8023
8024 free_temp_indexes(emit);
8025
8026 return TRUE;
8027 }
8028
8029 /**
8030 * Emit code for TGSI_OPCODE_TG4 (texture lookup for texture gather)
8031 */
8032 static boolean
8033 emit_tg4(struct svga_shader_emitter_v10 *emit,
8034 const struct tgsi_full_instruction *inst)
8035 {
8036 const uint unit = inst->Src[2].Register.Index;
8037 struct tgsi_full_src_register src;
8038 struct tgsi_full_src_register offset_src, sampler, ref;
8039 int offsets[3];
8040
8041 /* check that the sampler returns a float */
8042 if (!is_valid_tex_instruction(emit, inst))
8043 return TRUE;
8044
8045 if (emit->version >= 50) {
8046 unsigned target = inst->Texture.Texture;
8047 int index = inst->Src[1].Register.Index;
8048 const union tgsi_immediate_data *imm = emit->immediates[index];
8049 int select_comp = imm[inst->Src[1].Register.SwizzleX].Int;
8050 unsigned select_swizzle = PIPE_SWIZZLE_X;
8051
8052 if (!tgsi_is_shadow_target(target)) {
8053 switch (select_comp) {
8054 case 0:
8055 select_swizzle = emit->key.tex[unit].swizzle_r;
8056 break;
8057 case 1:
8058 select_swizzle = emit->key.tex[unit].swizzle_g;
8059 break;
8060 case 2:
8061 select_swizzle = emit->key.tex[unit].swizzle_b;
8062 break;
8063 case 3:
8064 select_swizzle = emit->key.tex[unit].swizzle_a;
8065 break;
8066 default:
8067 assert(!"Unexpected component in texture gather swizzle");
8068 }
8069 }
8070 else {
8071 select_swizzle = emit->key.tex[unit].swizzle_r;
8072 }
8073
8074 if (select_swizzle == PIPE_SWIZZLE_1) {
8075 src = make_immediate_reg_float(emit, 1.0);
8076 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8077 return TRUE;
8078 }
8079 else if (select_swizzle == PIPE_SWIZZLE_0) {
8080 src = make_immediate_reg_float(emit, 0.0);
8081 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8082 return TRUE;
8083 }
8084
8085 src = setup_texcoord(emit, unit, &inst->Src[0]);
8086
8087 /* GATHER4 dst, coord, resource, sampler */
8088 /* GATHER4_C dst, coord, resource, sampler ref */
8089 /* GATHER4_PO dst, coord, offset resource, sampler */
8090 /* GATHER4_PO_C dst, coord, offset resource, sampler, ref */
8091 begin_emit_instruction(emit);
8092 if (inst->Texture.NumOffsets == 1) {
8093 if (tgsi_is_shadow_target(target)) {
8094 emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO_C,
8095 inst->Instruction.Saturate);
8096 }
8097 else {
8098 emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO,
8099 inst->Instruction.Saturate);
8100 }
8101 }
8102 else {
8103 if (tgsi_is_shadow_target(target)) {
8104 emit_opcode(emit, VGPU10_OPCODE_GATHER4_C,
8105 inst->Instruction.Saturate);
8106 }
8107 else {
8108 emit_opcode(emit, VGPU10_OPCODE_GATHER4,
8109 inst->Instruction.Saturate);
8110 }
8111 }
8112
8113 emit_dst_register(emit, &inst->Dst[0]);
8114 emit_src_register(emit, &src);
8115 if (inst->Texture.NumOffsets == 1) {
8116 /* offset */
8117 offset_src = make_src_reg(inst->TexOffsets[0].File,
8118 inst->TexOffsets[0].Index);
8119 offset_src = swizzle_src(&offset_src, inst->TexOffsets[0].SwizzleX,
8120 inst->TexOffsets[0].SwizzleY,
8121 inst->TexOffsets[0].SwizzleZ,
8122 TGSI_SWIZZLE_W);
8123 emit_src_register(emit, &offset_src);
8124 }
8125
8126 /* resource */
8127 emit_resource_register(emit, unit);
8128
8129 /* sampler */
8130 sampler = make_src_reg(TGSI_FILE_SAMPLER, unit);
8131 sampler.Register.SwizzleX =
8132 sampler.Register.SwizzleY =
8133 sampler.Register.SwizzleZ =
8134 sampler.Register.SwizzleW = select_swizzle;
8135 emit_src_register(emit, &sampler);
8136
8137 if (tgsi_is_shadow_target(target)) {
8138 /* ref */
8139 if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
8140 ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8141 emit_tex_compare_refcoord(emit, target, &ref);
8142 }
8143 else {
8144 emit_tex_compare_refcoord(emit, target, &src);
8145 }
8146 }
8147
8148 end_emit_instruction(emit);
8149 free_temp_indexes(emit);
8150 }
8151 else {
8152 /* Only a single channel is supported in SM4_1 and we report
8153 * PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 1.
8154 * Only the 0th component will be gathered.
8155 */
8156 switch (emit->key.tex[unit].swizzle_r) {
8157 case PIPE_SWIZZLE_X:
8158 get_texel_offsets(emit, inst, offsets);
8159 src = setup_texcoord(emit, unit, &inst->Src[0]);
8160
8161 /* Gather dst, coord, resource, sampler */
8162 begin_emit_instruction(emit);
8163 emit_sample_opcode(emit, VGPU10_OPCODE_GATHER4,
8164 inst->Instruction.Saturate, offsets);
8165 emit_dst_register(emit, &inst->Dst[0]);
8166 emit_src_register(emit, &src);
8167 emit_resource_register(emit, unit);
8168
8169 /* sampler */
8170 sampler = make_src_reg(TGSI_FILE_SAMPLER, unit);
8171 sampler.Register.SwizzleX =
8172 sampler.Register.SwizzleY =
8173 sampler.Register.SwizzleZ =
8174 sampler.Register.SwizzleW = PIPE_SWIZZLE_X;
8175 emit_src_register(emit, &sampler);
8176
8177 end_emit_instruction(emit);
8178 break;
8179 case PIPE_SWIZZLE_W:
8180 case PIPE_SWIZZLE_1:
8181 src = make_immediate_reg_float(emit, 1.0);
8182 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8183 break;
8184 case PIPE_SWIZZLE_Y:
8185 case PIPE_SWIZZLE_Z:
8186 case PIPE_SWIZZLE_0:
8187 default:
8188 src = make_immediate_reg_float(emit, 0.0);
8189 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8190 break;
8191 }
8192 }
8193
8194 return TRUE;
8195 }
8196
8197
8198
8199 /**
8200 * Emit code for TGSI_OPCODE_TEX2 (texture lookup for shadow cube map arrays)
8201 */
8202 static boolean
8203 emit_tex2(struct svga_shader_emitter_v10 *emit,
8204 const struct tgsi_full_instruction *inst)
8205 {
8206 const uint unit = inst->Src[2].Register.Index;
8207 unsigned target = inst->Texture.Texture;
8208 struct tgsi_full_src_register coord, ref;
8209 int offsets[3];
8210 struct tex_swizzle_info swz_info;
8211
8212 /* check that the sampler returns a float */
8213 if (!is_valid_tex_instruction(emit, inst))
8214 return TRUE;
8215
8216 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
8217
8218 get_texel_offsets(emit, inst, offsets);
8219
8220 coord = setup_texcoord(emit, unit, &inst->Src[0]);
8221 ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8222
8223 /* SAMPLE_C dst, coord, resource, sampler, ref */
8224 begin_emit_instruction(emit);
8225 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_C,
8226 inst->Instruction.Saturate, offsets);
8227 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8228 emit_src_register(emit, &coord);
8229 emit_resource_register(emit, unit);
8230 emit_sampler_register(emit, unit);
8231 emit_tex_compare_refcoord(emit, target, &ref);
8232 end_emit_instruction(emit);
8233
8234 end_tex_swizzle(emit, &swz_info);
8235
8236 free_temp_indexes(emit);
8237
8238 return TRUE;
8239 }
8240
8241
8242 /**
8243 * Emit code for TGSI_OPCODE_TXP (projective texture)
8244 */
8245 static boolean
8246 emit_txp(struct svga_shader_emitter_v10 *emit,
8247 const struct tgsi_full_instruction *inst)
8248 {
8249 const uint unit = inst->Src[1].Register.Index;
8250 const enum tgsi_texture_type target = inst->Texture.Texture;
8251 VGPU10_OPCODE_TYPE opcode;
8252 int offsets[3];
8253 unsigned tmp = get_temp_index(emit);
8254 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8255 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8256 struct tgsi_full_src_register src0_wwww =
8257 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
8258 struct tgsi_full_src_register coord;
8259 struct tex_swizzle_info swz_info;
8260
8261 /* check that the sampler returns a float */
8262 if (!is_valid_tex_instruction(emit, inst))
8263 return TRUE;
8264
8265 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
8266
8267 get_texel_offsets(emit, inst, offsets);
8268
8269 coord = setup_texcoord(emit, unit, &inst->Src[0]);
8270
8271 /* DIV tmp, coord, coord.wwww */
8272 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst,
8273 &coord, &src0_wwww);
8274
8275 /* SAMPLE dst, coord(tmp), resource, sampler */
8276 begin_emit_instruction(emit);
8277
8278 if (tgsi_is_shadow_target(target))
8279 /* NOTE: for non-fragment shaders, we should use
8280 * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is.
8281 */
8282 opcode = VGPU10_OPCODE_SAMPLE_C;
8283 else
8284 opcode = VGPU10_OPCODE_SAMPLE;
8285
8286 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8287 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8288 emit_src_register(emit, &tmp_src); /* projected coord */
8289 emit_resource_register(emit, unit);
8290 emit_sampler_register(emit, unit);
8291 if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8292 emit_tex_compare_refcoord(emit, target, &tmp_src);
8293 }
8294 end_emit_instruction(emit);
8295
8296 end_tex_swizzle(emit, &swz_info);
8297
8298 free_temp_indexes(emit);
8299
8300 return TRUE;
8301 }
8302
8303
8304 /**
8305 * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
8306 */
8307 static boolean
8308 emit_txd(struct svga_shader_emitter_v10 *emit,
8309 const struct tgsi_full_instruction *inst)
8310 {
8311 const uint unit = inst->Src[3].Register.Index;
8312 const enum tgsi_texture_type target = inst->Texture.Texture;
8313 int offsets[3];
8314 struct tgsi_full_src_register coord;
8315 struct tex_swizzle_info swz_info;
8316
8317 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
8318 &swz_info);
8319
8320 get_texel_offsets(emit, inst, offsets);
8321
8322 coord = setup_texcoord(emit, unit, &inst->Src[0]);
8323
8324 /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */
8325 begin_emit_instruction(emit);
8326 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D,
8327 inst->Instruction.Saturate, offsets);
8328 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8329 emit_src_register(emit, &coord);
8330 emit_resource_register(emit, unit);
8331 emit_sampler_register(emit, unit);
8332 emit_src_register(emit, &inst->Src[1]); /* Xderiv */
8333 emit_src_register(emit, &inst->Src[2]); /* Yderiv */
8334 end_emit_instruction(emit);
8335
8336 end_tex_swizzle(emit, &swz_info);
8337
8338 free_temp_indexes(emit);
8339
8340 return TRUE;
8341 }
8342
8343
8344 /**
8345 * Emit code for TGSI_OPCODE_TXF (texel fetch)
8346 */
8347 static boolean
8348 emit_txf(struct svga_shader_emitter_v10 *emit,
8349 const struct tgsi_full_instruction *inst)
8350 {
8351 const uint unit = inst->Src[1].Register.Index;
8352 const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture)
8353 && emit->key.tex[unit].num_samples > 1;
8354 int offsets[3];
8355 struct tex_swizzle_info swz_info;
8356
8357 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
8358
8359 get_texel_offsets(emit, inst, offsets);
8360
8361 if (msaa) {
8362 assert(emit->key.tex[unit].num_samples > 1);
8363
8364 /* Fetch one sample from an MSAA texture */
8365 struct tgsi_full_src_register sampleIndex =
8366 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
8367 /* LD_MS dst, coord(s0), resource, sampleIndex */
8368 begin_emit_instruction(emit);
8369 emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS,
8370 inst->Instruction.Saturate, offsets);
8371 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8372 emit_src_register(emit, &inst->Src[0]);
8373 emit_resource_register(emit, unit);
8374 emit_src_register(emit, &sampleIndex);
8375 end_emit_instruction(emit);
8376 }
8377 else {
8378 /* Fetch one texel specified by integer coordinate */
8379 /* LD dst, coord(s0), resource */
8380 begin_emit_instruction(emit);
8381 emit_sample_opcode(emit, VGPU10_OPCODE_LD,
8382 inst->Instruction.Saturate, offsets);
8383 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8384 emit_src_register(emit, &inst->Src[0]);
8385 emit_resource_register(emit, unit);
8386 end_emit_instruction(emit);
8387 }
8388
8389 end_tex_swizzle(emit, &swz_info);
8390
8391 free_temp_indexes(emit);
8392
8393 return TRUE;
8394 }
8395
8396
8397 /**
8398 * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias)
8399 * or TGSI_OPCODE_TXB2 (for cube shadow maps).
8400 */
8401 static boolean
8402 emit_txl_txb(struct svga_shader_emitter_v10 *emit,
8403 const struct tgsi_full_instruction *inst)
8404 {
8405 const enum tgsi_texture_type target = inst->Texture.Texture;
8406 VGPU10_OPCODE_TYPE opcode;
8407 unsigned unit;
8408 int offsets[3];
8409 struct tgsi_full_src_register coord, lod_bias;
8410 struct tex_swizzle_info swz_info;
8411
8412 assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
8413 inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
8414 inst->Instruction.Opcode == TGSI_OPCODE_TXB2);
8415
8416 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
8417 lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8418 unit = inst->Src[2].Register.Index;
8419 }
8420 else {
8421 lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
8422 unit = inst->Src[1].Register.Index;
8423 }
8424
8425 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
8426 &swz_info);
8427
8428 get_texel_offsets(emit, inst, offsets);
8429
8430 coord = setup_texcoord(emit, unit, &inst->Src[0]);
8431
8432 /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */
8433 begin_emit_instruction(emit);
8434 if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
8435 opcode = VGPU10_OPCODE_SAMPLE_L;
8436 }
8437 else {
8438 opcode = VGPU10_OPCODE_SAMPLE_B;
8439 }
8440 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8441 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8442 emit_src_register(emit, &coord);
8443 emit_resource_register(emit, unit);
8444 emit_sampler_register(emit, unit);
8445 emit_src_register(emit, &lod_bias);
8446 end_emit_instruction(emit);
8447
8448 end_tex_swizzle(emit, &swz_info);
8449
8450 free_temp_indexes(emit);
8451
8452 return TRUE;
8453 }
8454
8455
8456 /**
8457 * Emit code for TGSI_OPCODE_TXL2 (explicit LOD) for cubemap array.
8458 */
8459 static boolean
8460 emit_txl2(struct svga_shader_emitter_v10 *emit,
8461 const struct tgsi_full_instruction *inst)
8462 {
8463 unsigned target = inst->Texture.Texture;
8464 unsigned opcode, unit;
8465 int offsets[3];
8466 struct tgsi_full_src_register coord, lod;
8467 struct tex_swizzle_info swz_info;
8468
8469 assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL2);
8470
8471 lod = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8472 unit = inst->Src[2].Register.Index;
8473
8474 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
8475 &swz_info);
8476
8477 get_texel_offsets(emit, inst, offsets);
8478
8479 coord = setup_texcoord(emit, unit, &inst->Src[0]);
8480
8481 /* SAMPLE_L dst, coord(s0), resource, sampler, lod(s3) */
8482 begin_emit_instruction(emit);
8483 opcode = VGPU10_OPCODE_SAMPLE_L;
8484 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8485 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8486 emit_src_register(emit, &coord);
8487 emit_resource_register(emit, unit);
8488 emit_sampler_register(emit, unit);
8489 emit_src_register(emit, &lod);
8490 end_emit_instruction(emit);
8491
8492 end_tex_swizzle(emit, &swz_info);
8493
8494 free_temp_indexes(emit);
8495
8496 return TRUE;
8497 }
8498
8499
8500 /**
8501 * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
8502 */
8503 static boolean
8504 emit_txq(struct svga_shader_emitter_v10 *emit,
8505 const struct tgsi_full_instruction *inst)
8506 {
8507 const uint unit = inst->Src[1].Register.Index;
8508
8509 if (emit->sampler_target[unit] == TGSI_TEXTURE_BUFFER) {
8510 /* RESINFO does not support querying texture buffers, so we instead
8511 * store texture buffer sizes in shader constants, then copy them to
8512 * implement TXQ instead of emitting RESINFO.
8513 * MOV dst, const[texture_buffer_size_index[unit]]
8514 */
8515 struct tgsi_full_src_register size_src =
8516 make_src_const_reg(emit->texture_buffer_size_index[unit]);
8517 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src);
8518 } else {
8519 /* RESINFO dst, srcMipLevel, resource */
8520 begin_emit_instruction(emit);
8521 emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
8522 emit_dst_register(emit, &inst->Dst[0]);
8523 emit_src_register(emit, &inst->Src[0]);
8524 emit_resource_register(emit, unit);
8525 end_emit_instruction(emit);
8526 }
8527
8528 free_temp_indexes(emit);
8529
8530 return TRUE;
8531 }
8532
8533
8534 /**
8535 * Does this opcode produce a double-precision result?
8536 * XXX perhaps move this to a TGSI utility.
8537 */
8538 static bool
8539 opcode_has_dbl_dst(unsigned opcode)
8540 {
8541 switch (opcode) {
8542 case TGSI_OPCODE_F2D:
8543 case TGSI_OPCODE_DABS:
8544 case TGSI_OPCODE_DADD:
8545 case TGSI_OPCODE_DFRAC:
8546 case TGSI_OPCODE_DMAX:
8547 case TGSI_OPCODE_DMIN:
8548 case TGSI_OPCODE_DMUL:
8549 case TGSI_OPCODE_DNEG:
8550 case TGSI_OPCODE_I2D:
8551 case TGSI_OPCODE_U2D:
8552 // XXX more TBD
8553 return true;
8554 default:
8555 return false;
8556 }
8557 }
8558
8559
8560 /**
8561 * Does this opcode use double-precision source registers?
8562 */
8563 static bool
8564 opcode_has_dbl_src(unsigned opcode)
8565 {
8566 switch (opcode) {
8567 case TGSI_OPCODE_D2F:
8568 case TGSI_OPCODE_DABS:
8569 case TGSI_OPCODE_DADD:
8570 case TGSI_OPCODE_DFRAC:
8571 case TGSI_OPCODE_DMAX:
8572 case TGSI_OPCODE_DMIN:
8573 case TGSI_OPCODE_DMUL:
8574 case TGSI_OPCODE_DNEG:
8575 case TGSI_OPCODE_D2I:
8576 case TGSI_OPCODE_D2U:
8577 // XXX more TBD
8578 return true;
8579 default:
8580 return false;
8581 }
8582 }
8583
8584
8585 /**
8586 * Check that the swizzle for reading from a double-precision register
8587 * is valid.
8588 */
8589 static void
8590 check_double_src_swizzle(const struct tgsi_full_src_register *reg)
8591 {
8592 assert((reg->Register.SwizzleX == PIPE_SWIZZLE_X &&
8593 reg->Register.SwizzleY == PIPE_SWIZZLE_Y) ||
8594 (reg->Register.SwizzleX == PIPE_SWIZZLE_Z &&
8595 reg->Register.SwizzleY == PIPE_SWIZZLE_W));
8596
8597 assert((reg->Register.SwizzleZ == PIPE_SWIZZLE_X &&
8598 reg->Register.SwizzleW == PIPE_SWIZZLE_Y) ||
8599 (reg->Register.SwizzleZ == PIPE_SWIZZLE_Z &&
8600 reg->Register.SwizzleW == PIPE_SWIZZLE_W));
8601 }
8602
8603
8604 /**
8605 * Check that the writemask for a double-precision instruction is valid.
8606 */
8607 static void
8608 check_double_dst_writemask(const struct tgsi_full_instruction *inst)
8609 {
8610 ASSERTED unsigned writemask = inst->Dst[0].Register.WriteMask;
8611
8612 switch (inst->Instruction.Opcode) {
8613 case TGSI_OPCODE_DABS:
8614 case TGSI_OPCODE_DADD:
8615 case TGSI_OPCODE_DFRAC:
8616 case TGSI_OPCODE_DNEG:
8617 case TGSI_OPCODE_DMAD:
8618 case TGSI_OPCODE_DMAX:
8619 case TGSI_OPCODE_DMIN:
8620 case TGSI_OPCODE_DMUL:
8621 case TGSI_OPCODE_DRCP:
8622 case TGSI_OPCODE_DSQRT:
8623 case TGSI_OPCODE_F2D:
8624 assert(writemask == TGSI_WRITEMASK_XYZW ||
8625 writemask == TGSI_WRITEMASK_XY ||
8626 writemask == TGSI_WRITEMASK_ZW);
8627 break;
8628 case TGSI_OPCODE_DSEQ:
8629 case TGSI_OPCODE_DSGE:
8630 case TGSI_OPCODE_DSNE:
8631 case TGSI_OPCODE_DSLT:
8632 case TGSI_OPCODE_D2I:
8633 case TGSI_OPCODE_D2U:
8634 /* Write to 1 or 2 components only */
8635 assert(util_bitcount(writemask) <= 2);
8636 break;
8637 default:
8638 /* XXX this list may be incomplete */
8639 ;
8640 }
8641 }
8642
8643
8644 /**
8645 * Double-precision absolute value.
8646 */
8647 static boolean
8648 emit_dabs(struct svga_shader_emitter_v10 *emit,
8649 const struct tgsi_full_instruction *inst)
8650 {
8651 assert(emit->version >= 50);
8652 check_double_src_swizzle(&inst->Src[0]);
8653 check_double_dst_writemask(inst);
8654
8655 struct tgsi_full_src_register abs_src = absolute_src(&inst->Src[0]);
8656
8657 /* DMOV dst, |src| */
8658 emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &abs_src);
8659
8660 return TRUE;
8661 }
8662
8663
8664 /**
8665 * Double-precision negation
8666 */
8667 static boolean
8668 emit_dneg(struct svga_shader_emitter_v10 *emit,
8669 const struct tgsi_full_instruction *inst)
8670 {
8671 assert(emit->version >= 50);
8672 check_double_src_swizzle(&inst->Src[0]);
8673 check_double_dst_writemask(inst);
8674
8675 struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
8676
8677 /* DMOV dst, -src */
8678 emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &neg_src);
8679
8680 return TRUE;
8681 }
8682
8683
8684 /**
8685 * SM5 has no DMAD opcode. Implement negation with DMUL/DADD.
8686 */
8687 static boolean
8688 emit_dmad(struct svga_shader_emitter_v10 *emit,
8689 const struct tgsi_full_instruction *inst)
8690 {
8691 assert(emit->version >= 50);
8692 check_double_src_swizzle(&inst->Src[0]);
8693 check_double_src_swizzle(&inst->Src[1]);
8694 check_double_src_swizzle(&inst->Src[2]);
8695 check_double_dst_writemask(inst);
8696
8697 unsigned tmp = get_temp_index(emit);
8698 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8699 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8700
8701 /* DMUL tmp, src[0], src[1] */
8702 emit_instruction_opn(emit, VGPU10_OPCODE_DMUL,
8703 &tmp_dst, &inst->Src[0], &inst->Src[1], NULL,
8704 FALSE, inst->Instruction.Precise);
8705
8706 /* DADD dst, tmp, src[2] */
8707 emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
8708 &inst->Dst[0], &tmp_src, &inst->Src[2], NULL,
8709 inst->Instruction.Saturate, inst->Instruction.Precise);
8710 free_temp_indexes(emit);
8711
8712 return TRUE;
8713 }
8714
8715
8716 /**
8717 * Double precision reciprocal square root
8718 */
8719 static boolean
8720 emit_drsq(struct svga_shader_emitter_v10 *emit,
8721 const struct tgsi_full_dst_register *dst,
8722 const struct tgsi_full_src_register *src)
8723 {
8724 assert(emit->version >= 50);
8725
8726 VGPU10OpcodeToken0 token0;
8727 begin_emit_instruction(emit);
8728
8729 token0.value = 0;
8730 token0.opcodeType = VGPU10_OPCODE_VMWARE;
8731 token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DRSQ;
8732 emit_dword(emit, token0.value);
8733
8734 emit_dst_register(emit, dst);
8735
8736 check_double_src_swizzle(src);
8737 emit_src_register(emit, src);
8738
8739 end_emit_instruction(emit);
8740
8741 return TRUE;
8742 }
8743
8744
8745 /**
8746 * There is no SM5 opcode for double precision square root.
8747 * It will be implemented with DRSQ.
8748 * dst = src * DRSQ(src)
8749 */
8750 static boolean
8751 emit_dsqrt(struct svga_shader_emitter_v10 *emit,
8752 const struct tgsi_full_instruction *inst)
8753 {
8754 assert(emit->version >= 50);
8755
8756 check_double_src_swizzle(&inst->Src[0]);
8757
8758 /* temporary register to hold the source */
8759 unsigned tmp = get_temp_index(emit);
8760 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8761 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8762
8763 /* temporary register to hold the DEQ result */
8764 unsigned tmp_cond = get_temp_index(emit);
8765 struct tgsi_full_dst_register tmp_cond_dst = make_dst_temp_reg(tmp_cond);
8766 struct tgsi_full_dst_register tmp_cond_dst_xy =
8767 writemask_dst(&tmp_cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
8768 struct tgsi_full_src_register tmp_cond_src = make_src_temp_reg(tmp_cond);
8769 struct tgsi_full_src_register tmp_cond_src_xy =
8770 swizzle_src(&tmp_cond_src,
8771 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
8772 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y);
8773
8774 /* The reciprocal square root of zero yields INF.
8775 * So if the source is 0, we replace it with 1 in the tmp register.
8776 * The later multiplication of zero in the original source will yield 0
8777 * in the result.
8778 */
8779
8780 /* tmp1 = (src == 0) ? 1 : src;
8781 * EQ tmp1, 0, src
8782 * MOVC tmp, tmp1, 1.0, src
8783 */
8784 struct tgsi_full_src_register zero =
8785 make_immediate_reg_double(emit, 0);
8786
8787 struct tgsi_full_src_register one =
8788 make_immediate_reg_double(emit, 1.0);
8789
8790 emit_instruction_op2(emit, VGPU10_OPCODE_DEQ, &tmp_cond_dst_xy,
8791 &zero, &inst->Src[0]);
8792 emit_instruction_op3(emit, VGPU10_OPCODE_DMOVC, &tmp_dst,
8793 &tmp_cond_src_xy, &one, &inst->Src[0]);
8794
8795 struct tgsi_full_dst_register tmp_rsq_dst = make_dst_temp_reg(tmp);
8796 struct tgsi_full_src_register tmp_rsq_src = make_src_temp_reg(tmp);
8797
8798 /* DRSQ tmp_rsq, tmp */
8799 emit_drsq(emit, &tmp_rsq_dst, &tmp_src);
8800
8801 /* DMUL dst, tmp_rsq, src[0] */
8802 emit_instruction_op2(emit, VGPU10_OPCODE_DMUL, &inst->Dst[0],
8803 &tmp_rsq_src, &inst->Src[0]);
8804
8805 free_temp_indexes(emit);
8806
8807 return TRUE;
8808 }
8809
8810
8811 static boolean
8812 emit_interp_offset(struct svga_shader_emitter_v10 *emit,
8813 const struct tgsi_full_instruction *inst)
8814 {
8815 assert(emit->version >= 50);
8816
8817 /* The src1.xy offset is a float with values in the range [-0.5, 0.5]
8818 * where (0,0) is the center of the pixel. We need to translate that
8819 * into an integer offset on a 16x16 grid in the range [-8/16, 7/16].
8820 * Also need to flip the Y axis (I think).
8821 */
8822 unsigned tmp = get_temp_index(emit);
8823 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8824 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8825 struct tgsi_full_dst_register tmp_dst_xy =
8826 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
8827 struct tgsi_full_src_register const16 =
8828 make_immediate_reg_float4(emit, 16.0f, -16.0, 0, 0);
8829
8830 /* MUL tmp.xy, src1, {16, -16, 0, 0} */
8831 emit_instruction_op2(emit, VGPU10_OPCODE_MUL,
8832 &tmp_dst_xy, &inst->Src[1], &const16);
8833
8834 /* FTOI tmp.xy, tmp */
8835 emit_instruction_op1(emit, VGPU10_OPCODE_FTOI, &tmp_dst_xy, &tmp_src);
8836
8837 /* EVAL_SNAPPED dst, src0, tmp */
8838 emit_instruction_op2(emit, VGPU10_OPCODE_EVAL_SNAPPED,
8839 &inst->Dst[0], &inst->Src[0], &tmp_src);
8840
8841 free_temp_indexes(emit);
8842
8843 return TRUE;
8844 }
8845
8846
8847 /**
8848 * Emit a simple instruction (like ADD, MUL, MIN, etc).
8849 */
8850 static boolean
8851 emit_simple(struct svga_shader_emitter_v10 *emit,
8852 const struct tgsi_full_instruction *inst)
8853 {
8854 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
8855 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
8856 const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
8857 const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
8858 unsigned i;
8859
8860 if (inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) {
8861 emit->current_loop_depth++;
8862 }
8863 else if (inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP) {
8864 emit->current_loop_depth--;
8865 }
8866
8867 begin_emit_instruction(emit);
8868 emit_opcode_precise(emit, translate_opcode(inst->Instruction.Opcode),
8869 inst->Instruction.Saturate,
8870 inst->Instruction.Precise);
8871 for (i = 0; i < op->num_dst; i++) {
8872 if (dbl_dst) {
8873 check_double_dst_writemask(inst);
8874 }
8875 emit_dst_register(emit, &inst->Dst[i]);
8876 }
8877 for (i = 0; i < op->num_src; i++) {
8878 if (dbl_src) {
8879 check_double_src_swizzle(&inst->Src[i]);
8880 }
8881 emit_src_register(emit, &inst->Src[i]);
8882 }
8883 end_emit_instruction(emit);
8884
8885 return TRUE;
8886 }
8887
8888
8889 /**
8890 * Emit MSB instruction (like IMSB, UMSB).
8891 *
8892 * GLSL returns the index starting from the LSB;
8893 * whereas in SM5, firstbit_hi/shi returns the index starting from the MSB.
8894 * To get correct location as per glsl from SM5 device, we should
8895 * return (31 - index) if returned index is not -1.
8896 */
8897 static boolean
8898 emit_msb(struct svga_shader_emitter_v10 *emit,
8899 const struct tgsi_full_instruction *inst)
8900 {
8901 const struct tgsi_full_dst_register *index_dst = &inst->Dst[0];
8902
8903 assert(index_dst->Register.File != TGSI_FILE_OUTPUT);
8904
8905 struct tgsi_full_src_register index_src =
8906 make_src_reg(index_dst->Register.File, index_dst->Register.Index);
8907 struct tgsi_full_src_register imm31 =
8908 make_immediate_reg_int(emit, 31);
8909 imm31 = scalar_src(&imm31, TGSI_SWIZZLE_X);
8910 struct tgsi_full_src_register neg_one =
8911 make_immediate_reg_int(emit, -1);
8912 neg_one = scalar_src(&neg_one, TGSI_SWIZZLE_X);
8913 unsigned tmp = get_temp_index(emit);
8914 const struct tgsi_full_dst_register tmp_dst =
8915 make_dst_temp_reg(tmp);
8916 const struct tgsi_full_dst_register tmp_dst_x =
8917 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
8918 const struct tgsi_full_src_register tmp_src_x =
8919 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp, TGSI_SWIZZLE_X);
8920 int writemask = TGSI_WRITEMASK_X;
8921 int src_swizzle = TGSI_SWIZZLE_X;
8922 int dst_writemask = index_dst->Register.WriteMask;
8923
8924 emit_simple(emit, inst);
8925
8926 /* index conversion from SM5 to GLSL */
8927 while (writemask & dst_writemask) {
8928 struct tgsi_full_src_register index_src_comp =
8929 scalar_src(&index_src, src_swizzle);
8930 struct tgsi_full_dst_register index_dst_comp =
8931 writemask_dst(index_dst, writemask);
8932
8933 /* check if index_src_comp != -1 */
8934 emit_instruction_op2(emit, VGPU10_OPCODE_INE,
8935 &tmp_dst_x, &index_src_comp, &neg_one);
8936
8937 /* if */
8938 emit_if(emit, &tmp_src_x);
8939
8940 index_src_comp = negate_src(&index_src_comp);
8941 /* SUB DST, IMM{31}, DST */
8942 emit_instruction_op2(emit, VGPU10_OPCODE_IADD,
8943 &index_dst_comp, &imm31, &index_src_comp);
8944
8945 /* endif */
8946 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
8947
8948 writemask = writemask << 1;
8949 src_swizzle = src_swizzle + 1;
8950 }
8951 free_temp_indexes(emit);
8952 return TRUE;
8953 }
8954
8955
8956 /**
8957 * Emit a BFE instruction (like UBFE, IBFE).
8958 * tgsi representation:
8959 * U/IBFE dst, value, offset, width
8960 * SM5 representation:
8961 * U/IBFE dst, width, offset, value
8962 * Note: SM5 has width & offset range (0-31);
8963 * whereas GLSL has width & offset range (0-32)
8964 */
8965 static boolean
8966 emit_bfe(struct svga_shader_emitter_v10 *emit,
8967 const struct tgsi_full_instruction *inst)
8968 {
8969 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
8970 struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
8971 imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
8972 struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
8973 zero = scalar_src(&zero, TGSI_SWIZZLE_X);
8974
8975 unsigned tmp1 = get_temp_index(emit);
8976 const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
8977 const struct tgsi_full_dst_register cond1_dst_x =
8978 writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
8979 const struct tgsi_full_src_register cond1_src_x =
8980 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
8981
8982 unsigned tmp2 = get_temp_index(emit);
8983 const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
8984 const struct tgsi_full_dst_register cond2_dst_x =
8985 writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
8986 const struct tgsi_full_src_register cond2_src_x =
8987 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
8988
8989 /**
8990 * In SM5, when width = 32 and offset = 0, it returns 0.
8991 * On the other hand GLSL, expects value to be copied as it is, to dst.
8992 */
8993
8994 /* cond1 = width ! = 32 */
8995 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
8996 &cond1_dst_x, &inst->Src[2], &imm32);
8997
8998 /* cond2 = offset ! = 0 */
8999 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9000 &cond2_dst_x, &inst->Src[1], &zero);
9001
9002 /* cond 2 = cond1 & cond 2 */
9003 emit_instruction_op2(emit, VGPU10_OPCODE_AND, &cond2_dst_x,
9004 &cond2_src_x,
9005 &cond1_src_x);
9006 /* IF */
9007 emit_if(emit, &cond2_src_x);
9008
9009 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9010 &inst->Src[0]);
9011
9012 /* ELSE */
9013 emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9014
9015 /* U/IBFE dst, width, offset, value */
9016 emit_instruction_op3(emit, translate_opcode(opcode), &inst->Dst[0],
9017 &inst->Src[2], &inst->Src[1], &inst->Src[0]);
9018
9019 /* ENDIF */
9020 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9021
9022 free_temp_indexes(emit);
9023 return TRUE;
9024 }
9025
9026
9027 /**
9028 * Emit BFI instruction
9029 * tgsi representation:
9030 * BFI dst, base, insert, offset, width
9031 * SM5 representation:
9032 * BFI dst, width, offset, insert, base
9033 * Note: SM5 has width & offset range (0-31);
9034 * whereas GLSL has width & offset range (0-32)
9035 */
9036 static boolean
9037 emit_bfi(struct svga_shader_emitter_v10 *emit,
9038 const struct tgsi_full_instruction *inst)
9039 {
9040 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9041 struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
9042 imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
9043
9044 struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
9045 zero = scalar_src(&zero, TGSI_SWIZZLE_X);
9046
9047 unsigned tmp1 = get_temp_index(emit);
9048 const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
9049 const struct tgsi_full_dst_register cond1_dst_x =
9050 writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
9051 const struct tgsi_full_src_register cond1_src_x =
9052 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
9053
9054 unsigned tmp2 = get_temp_index(emit);
9055 const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
9056 const struct tgsi_full_dst_register cond2_dst_x =
9057 writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
9058 const struct tgsi_full_src_register cond2_src_x =
9059 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
9060
9061 /**
9062 * In SM5, when width = 32 and offset = 0, it returns 0.
9063 * On the other hand GLSL, expects insert to be copied as it is, to dst.
9064 */
9065
9066 /* cond1 = width == 32 */
9067 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9068 &cond1_dst_x, &inst->Src[3], &imm32);
9069
9070 /* cond1 = offset == 0 */
9071 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9072 &cond2_dst_x, &inst->Src[2], &zero);
9073
9074 /* cond2 = cond1 & cond2 */
9075 emit_instruction_op2(emit, VGPU10_OPCODE_AND,
9076 &cond2_dst_x, &cond2_src_x, &cond1_src_x);
9077
9078 /* if */
9079 emit_if(emit, &cond2_src_x);
9080
9081 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9082 &inst->Src[1]);
9083
9084 /* else */
9085 emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9086
9087 /* BFI dst, width, offset, insert, base */
9088 begin_emit_instruction(emit);
9089 emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
9090 emit_dst_register(emit, &inst->Dst[0]);
9091 emit_src_register(emit, &inst->Src[3]);
9092 emit_src_register(emit, &inst->Src[2]);
9093 emit_src_register(emit, &inst->Src[1]);
9094 emit_src_register(emit, &inst->Src[0]);
9095 end_emit_instruction(emit);
9096
9097 /* endif */
9098 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9099
9100 free_temp_indexes(emit);
9101 return TRUE;
9102 }
9103
9104
9105 /**
9106 * We only special case the MOV instruction to try to detect constant
9107 * color writes in the fragment shader.
9108 */
9109 static boolean
9110 emit_mov(struct svga_shader_emitter_v10 *emit,
9111 const struct tgsi_full_instruction *inst)
9112 {
9113 const struct tgsi_full_src_register *src = &inst->Src[0];
9114 const struct tgsi_full_dst_register *dst = &inst->Dst[0];
9115
9116 if (emit->unit == PIPE_SHADER_FRAGMENT &&
9117 dst->Register.File == TGSI_FILE_OUTPUT &&
9118 dst->Register.Index == 0 &&
9119 src->Register.File == TGSI_FILE_CONSTANT &&
9120 !src->Register.Indirect) {
9121 emit->constant_color_output = TRUE;
9122 }
9123
9124 return emit_simple(emit, inst);
9125 }
9126
9127
9128 /**
9129 * Emit a simple VGPU10 instruction which writes to multiple dest registers,
9130 * where TGSI only uses one dest register.
9131 */
9132 static boolean
9133 emit_simple_1dst(struct svga_shader_emitter_v10 *emit,
9134 const struct tgsi_full_instruction *inst,
9135 unsigned dst_count,
9136 unsigned dst_index)
9137 {
9138 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9139 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
9140 unsigned i;
9141
9142 begin_emit_instruction(emit);
9143 emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
9144
9145 for (i = 0; i < dst_count; i++) {
9146 if (i == dst_index) {
9147 emit_dst_register(emit, &inst->Dst[0]);
9148 } else {
9149 emit_null_dst_register(emit);
9150 }
9151 }
9152
9153 for (i = 0; i < op->num_src; i++) {
9154 emit_src_register(emit, &inst->Src[i]);
9155 }
9156 end_emit_instruction(emit);
9157
9158 return TRUE;
9159 }
9160
9161
9162 /**
9163 * Emit a vmware specific VGPU10 instruction.
9164 */
9165 static boolean
9166 emit_vmware(struct svga_shader_emitter_v10 *emit,
9167 const struct tgsi_full_instruction *inst,
9168 VGPU10_VMWARE_OPCODE_TYPE subopcode)
9169 {
9170 VGPU10OpcodeToken0 token0;
9171 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9172 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
9173 const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
9174 const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
9175
9176 unsigned i;
9177
9178 begin_emit_instruction(emit);
9179
9180 assert((subopcode > 0 && emit->version >= 50) || subopcode == 0);
9181
9182 token0.value = 0;
9183 token0.opcodeType = VGPU10_OPCODE_VMWARE;
9184 token0.vmwareOpcodeType = subopcode;
9185 emit_dword(emit, token0.value);
9186
9187 if (subopcode == VGPU10_VMWARE_OPCODE_IDIV) {
9188 /* IDIV only uses the first dest register. */
9189 emit_dst_register(emit, &inst->Dst[0]);
9190 emit_null_dst_register(emit);
9191 } else {
9192 for (i = 0; i < op->num_dst; i++) {
9193 if (dbl_dst) {
9194 check_double_dst_writemask(inst);
9195 }
9196 emit_dst_register(emit, &inst->Dst[i]);
9197 }
9198 }
9199
9200 for (i = 0; i < op->num_src; i++) {
9201 if (dbl_src) {
9202 check_double_src_swizzle(&inst->Src[i]);
9203 }
9204 emit_src_register(emit, &inst->Src[i]);
9205 }
9206 end_emit_instruction(emit);
9207
9208 return TRUE;
9209 }
9210
9211
9212 /**
9213 * Translate a single TGSI instruction to VGPU10.
9214 */
9215 static boolean
9216 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
9217 unsigned inst_number,
9218 const struct tgsi_full_instruction *inst)
9219 {
9220 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9221
9222 if (emit->skip_instruction)
9223 return TRUE;
9224
9225 switch (opcode) {
9226 case TGSI_OPCODE_ADD:
9227 case TGSI_OPCODE_AND:
9228 case TGSI_OPCODE_BGNLOOP:
9229 case TGSI_OPCODE_BRK:
9230 case TGSI_OPCODE_CEIL:
9231 case TGSI_OPCODE_CONT:
9232 case TGSI_OPCODE_DDX:
9233 case TGSI_OPCODE_DDY:
9234 case TGSI_OPCODE_DIV:
9235 case TGSI_OPCODE_DP2:
9236 case TGSI_OPCODE_DP3:
9237 case TGSI_OPCODE_DP4:
9238 case TGSI_OPCODE_ELSE:
9239 case TGSI_OPCODE_ENDIF:
9240 case TGSI_OPCODE_ENDLOOP:
9241 case TGSI_OPCODE_ENDSUB:
9242 case TGSI_OPCODE_F2I:
9243 case TGSI_OPCODE_F2U:
9244 case TGSI_OPCODE_FLR:
9245 case TGSI_OPCODE_FRC:
9246 case TGSI_OPCODE_FSEQ:
9247 case TGSI_OPCODE_FSGE:
9248 case TGSI_OPCODE_FSLT:
9249 case TGSI_OPCODE_FSNE:
9250 case TGSI_OPCODE_I2F:
9251 case TGSI_OPCODE_IMAX:
9252 case TGSI_OPCODE_IMIN:
9253 case TGSI_OPCODE_INEG:
9254 case TGSI_OPCODE_ISGE:
9255 case TGSI_OPCODE_ISHR:
9256 case TGSI_OPCODE_ISLT:
9257 case TGSI_OPCODE_MAD:
9258 case TGSI_OPCODE_MAX:
9259 case TGSI_OPCODE_MIN:
9260 case TGSI_OPCODE_MUL:
9261 case TGSI_OPCODE_NOP:
9262 case TGSI_OPCODE_NOT:
9263 case TGSI_OPCODE_OR:
9264 case TGSI_OPCODE_UADD:
9265 case TGSI_OPCODE_USEQ:
9266 case TGSI_OPCODE_USGE:
9267 case TGSI_OPCODE_USLT:
9268 case TGSI_OPCODE_UMIN:
9269 case TGSI_OPCODE_UMAD:
9270 case TGSI_OPCODE_UMAX:
9271 case TGSI_OPCODE_ROUND:
9272 case TGSI_OPCODE_SQRT:
9273 case TGSI_OPCODE_SHL:
9274 case TGSI_OPCODE_TRUNC:
9275 case TGSI_OPCODE_U2F:
9276 case TGSI_OPCODE_UCMP:
9277 case TGSI_OPCODE_USHR:
9278 case TGSI_OPCODE_USNE:
9279 case TGSI_OPCODE_XOR:
9280 /* Begin SM5 opcodes */
9281 case TGSI_OPCODE_F2D:
9282 case TGSI_OPCODE_D2F:
9283 case TGSI_OPCODE_DADD:
9284 case TGSI_OPCODE_DMUL:
9285 case TGSI_OPCODE_DMAX:
9286 case TGSI_OPCODE_DMIN:
9287 case TGSI_OPCODE_DSGE:
9288 case TGSI_OPCODE_DSLT:
9289 case TGSI_OPCODE_DSEQ:
9290 case TGSI_OPCODE_DSNE:
9291 case TGSI_OPCODE_BREV:
9292 case TGSI_OPCODE_POPC:
9293 case TGSI_OPCODE_LSB:
9294 case TGSI_OPCODE_INTERP_CENTROID:
9295 case TGSI_OPCODE_INTERP_SAMPLE:
9296 /* simple instructions */
9297 return emit_simple(emit, inst);
9298 case TGSI_OPCODE_RET:
9299 if (emit->unit == PIPE_SHADER_TESS_CTRL &&
9300 !emit->tcs.control_point_phase) {
9301
9302 /* store the tessellation levels in the patch constant phase only */
9303 store_tesslevels(emit);
9304 }
9305 return emit_simple(emit, inst);
9306
9307 case TGSI_OPCODE_IMSB:
9308 case TGSI_OPCODE_UMSB:
9309 return emit_msb(emit, inst);
9310 case TGSI_OPCODE_IBFE:
9311 case TGSI_OPCODE_UBFE:
9312 return emit_bfe(emit, inst);
9313 case TGSI_OPCODE_BFI:
9314 return emit_bfi(emit, inst);
9315 case TGSI_OPCODE_MOV:
9316 return emit_mov(emit, inst);
9317 case TGSI_OPCODE_EMIT:
9318 return emit_vertex(emit, inst);
9319 case TGSI_OPCODE_ENDPRIM:
9320 return emit_endprim(emit, inst);
9321 case TGSI_OPCODE_IABS:
9322 return emit_iabs(emit, inst);
9323 case TGSI_OPCODE_ARL:
9324 /* fall-through */
9325 case TGSI_OPCODE_UARL:
9326 return emit_arl_uarl(emit, inst);
9327 case TGSI_OPCODE_BGNSUB:
9328 /* no-op */
9329 return TRUE;
9330 case TGSI_OPCODE_CAL:
9331 return emit_cal(emit, inst);
9332 case TGSI_OPCODE_CMP:
9333 return emit_cmp(emit, inst);
9334 case TGSI_OPCODE_COS:
9335 return emit_sincos(emit, inst);
9336 case TGSI_OPCODE_DST:
9337 return emit_dst(emit, inst);
9338 case TGSI_OPCODE_EX2:
9339 return emit_ex2(emit, inst);
9340 case TGSI_OPCODE_EXP:
9341 return emit_exp(emit, inst);
9342 case TGSI_OPCODE_IF:
9343 return emit_if(emit, &inst->Src[0]);
9344 case TGSI_OPCODE_KILL:
9345 return emit_kill(emit, inst);
9346 case TGSI_OPCODE_KILL_IF:
9347 return emit_kill_if(emit, inst);
9348 case TGSI_OPCODE_LG2:
9349 return emit_lg2(emit, inst);
9350 case TGSI_OPCODE_LIT:
9351 return emit_lit(emit, inst);
9352 case TGSI_OPCODE_LODQ:
9353 return emit_lodq(emit, inst);
9354 case TGSI_OPCODE_LOG:
9355 return emit_log(emit, inst);
9356 case TGSI_OPCODE_LRP:
9357 return emit_lrp(emit, inst);
9358 case TGSI_OPCODE_POW:
9359 return emit_pow(emit, inst);
9360 case TGSI_OPCODE_RCP:
9361 return emit_rcp(emit, inst);
9362 case TGSI_OPCODE_RSQ:
9363 return emit_rsq(emit, inst);
9364 case TGSI_OPCODE_SAMPLE:
9365 return emit_sample(emit, inst);
9366 case TGSI_OPCODE_SEQ:
9367 return emit_seq(emit, inst);
9368 case TGSI_OPCODE_SGE:
9369 return emit_sge(emit, inst);
9370 case TGSI_OPCODE_SGT:
9371 return emit_sgt(emit, inst);
9372 case TGSI_OPCODE_SIN:
9373 return emit_sincos(emit, inst);
9374 case TGSI_OPCODE_SLE:
9375 return emit_sle(emit, inst);
9376 case TGSI_OPCODE_SLT:
9377 return emit_slt(emit, inst);
9378 case TGSI_OPCODE_SNE:
9379 return emit_sne(emit, inst);
9380 case TGSI_OPCODE_SSG:
9381 return emit_ssg(emit, inst);
9382 case TGSI_OPCODE_ISSG:
9383 return emit_issg(emit, inst);
9384 case TGSI_OPCODE_TEX:
9385 return emit_tex(emit, inst);
9386 case TGSI_OPCODE_TG4:
9387 return emit_tg4(emit, inst);
9388 case TGSI_OPCODE_TEX2:
9389 return emit_tex2(emit, inst);
9390 case TGSI_OPCODE_TXP:
9391 return emit_txp(emit, inst);
9392 case TGSI_OPCODE_TXB:
9393 case TGSI_OPCODE_TXB2:
9394 case TGSI_OPCODE_TXL:
9395 return emit_txl_txb(emit, inst);
9396 case TGSI_OPCODE_TXD:
9397 return emit_txd(emit, inst);
9398 case TGSI_OPCODE_TXF:
9399 return emit_txf(emit, inst);
9400 case TGSI_OPCODE_TXL2:
9401 return emit_txl2(emit, inst);
9402 case TGSI_OPCODE_TXQ:
9403 return emit_txq(emit, inst);
9404 case TGSI_OPCODE_UIF:
9405 return emit_if(emit, &inst->Src[0]);
9406 case TGSI_OPCODE_UMUL_HI:
9407 case TGSI_OPCODE_IMUL_HI:
9408 case TGSI_OPCODE_UDIV:
9409 /* These cases use only the FIRST of two destination registers */
9410 return emit_simple_1dst(emit, inst, 2, 0);
9411 case TGSI_OPCODE_IDIV:
9412 return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_IDIV);
9413 case TGSI_OPCODE_UMUL:
9414 case TGSI_OPCODE_UMOD:
9415 case TGSI_OPCODE_MOD:
9416 /* These cases use only the SECOND of two destination registers */
9417 return emit_simple_1dst(emit, inst, 2, 1);
9418
9419 /* Begin SM5 opcodes */
9420 case TGSI_OPCODE_DABS:
9421 return emit_dabs(emit, inst);
9422 case TGSI_OPCODE_DNEG:
9423 return emit_dneg(emit, inst);
9424 case TGSI_OPCODE_DRCP:
9425 return emit_simple(emit, inst);
9426 case TGSI_OPCODE_DSQRT:
9427 return emit_dsqrt(emit, inst);
9428 case TGSI_OPCODE_DMAD:
9429 return emit_dmad(emit, inst);
9430 case TGSI_OPCODE_DFRAC:
9431 return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_DFRC);
9432 case TGSI_OPCODE_D2I:
9433 case TGSI_OPCODE_D2U:
9434 return emit_simple(emit, inst);
9435 case TGSI_OPCODE_I2D:
9436 case TGSI_OPCODE_U2D:
9437 return emit_simple(emit, inst);
9438 case TGSI_OPCODE_DRSQ:
9439 return emit_drsq(emit, &inst->Dst[0], &inst->Src[0]);
9440 case TGSI_OPCODE_DDIV:
9441 return emit_simple(emit, inst);
9442 case TGSI_OPCODE_INTERP_OFFSET:
9443 return emit_interp_offset(emit, inst);
9444
9445 /* The following opcodes should never be seen here. We return zero
9446 * for all the PIPE_CAP_TGSI_DROUND_SUPPORTED, DFRACEXP_DLDEXP_SUPPORTED,
9447 * FMA_SUPPORTED, LDEXP_SUPPORTED queries.
9448 */
9449 case TGSI_OPCODE_FMA:
9450 case TGSI_OPCODE_LDEXP:
9451 case TGSI_OPCODE_DSSG:
9452 case TGSI_OPCODE_DFRACEXP:
9453 case TGSI_OPCODE_DLDEXP:
9454 case TGSI_OPCODE_DTRUNC:
9455 case TGSI_OPCODE_DCEIL:
9456 case TGSI_OPCODE_DFLR:
9457 debug_printf("Unexpected TGSI opcode %s. "
9458 "Should have been translated away by the GLSL compiler.\n",
9459 tgsi_get_opcode_name(opcode));
9460 return FALSE;
9461
9462 case TGSI_OPCODE_LOAD:
9463 case TGSI_OPCODE_STORE:
9464 case TGSI_OPCODE_ATOMAND:
9465 case TGSI_OPCODE_ATOMCAS:
9466 case TGSI_OPCODE_ATOMIMAX:
9467 case TGSI_OPCODE_ATOMIMIN:
9468 case TGSI_OPCODE_ATOMOR:
9469 case TGSI_OPCODE_ATOMUADD:
9470 case TGSI_OPCODE_ATOMUMAX:
9471 case TGSI_OPCODE_ATOMUMIN:
9472 case TGSI_OPCODE_ATOMXCHG:
9473 case TGSI_OPCODE_ATOMXOR:
9474 return FALSE;
9475 case TGSI_OPCODE_BARRIER:
9476 if (emit->unit == PIPE_SHADER_TESS_CTRL) {
9477 /* SM5 device doesn't support BARRIER in tcs . If barrier is used
9478 * in shader, don't do anything for this opcode and continue rest
9479 * of shader translation
9480 */
9481 pipe_debug_message(&emit->svga_debug_callback, INFO,
9482 "barrier instruction is not supported in tessellation control shader\n");
9483 return TRUE;
9484 }
9485 else {
9486 return emit_simple(emit, inst);
9487 }
9488
9489 case TGSI_OPCODE_END:
9490 if (!emit_post_helpers(emit))
9491 return FALSE;
9492 return emit_simple(emit, inst);
9493
9494 default:
9495 debug_printf("Unimplemented tgsi instruction %s\n",
9496 tgsi_get_opcode_name(opcode));
9497 return FALSE;
9498 }
9499
9500 return TRUE;
9501 }
9502
9503
9504 /**
9505 * Emit the extra instructions to adjust the vertex position.
9506 * There are two possible adjustments:
9507 * 1. Converting from Gallium to VGPU10 coordinate space by applying the
9508 * "prescale" and "pretranslate" values.
9509 * 2. Undoing the viewport transformation when we use the swtnl/draw path.
9510 * \param vs_pos_tmp_index which temporary register contains the vertex pos.
9511 */
9512 static void
9513 emit_vpos_instructions(struct svga_shader_emitter_v10 *emit)
9514 {
9515 struct tgsi_full_src_register tmp_pos_src;
9516 struct tgsi_full_dst_register pos_dst;
9517 const unsigned vs_pos_tmp_index = emit->vposition.tmp_index;
9518
9519 /* Don't bother to emit any extra vertex instructions if vertex position is
9520 * not written out
9521 */
9522 if (emit->vposition.out_index == INVALID_INDEX)
9523 return;
9524
9525 /**
9526 * Reset the temporary vertex position register index
9527 * so that emit_dst_register() will use the real vertex position output
9528 */
9529 emit->vposition.tmp_index = INVALID_INDEX;
9530
9531 tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index);
9532 pos_dst = make_dst_output_reg(emit->vposition.out_index);
9533
9534 /* If non-adjusted vertex position register index
9535 * is valid, copy the vertex position from the temporary
9536 * vertex position register before it is modified by the
9537 * prescale computation.
9538 */
9539 if (emit->vposition.so_index != INVALID_INDEX) {
9540 struct tgsi_full_dst_register pos_so_dst =
9541 make_dst_output_reg(emit->vposition.so_index);
9542
9543 /* MOV pos_so, tmp_pos */
9544 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, &tmp_pos_src);
9545 }
9546
9547 if (emit->vposition.need_prescale) {
9548 /* This code adjusts the vertex position to match the VGPU10 convention.
9549 * If p is the position computed by the shader (usually by applying the
9550 * modelview and projection matrices), the new position q is computed by:
9551 *
9552 * q.x = p.w * trans.x + p.x * scale.x
9553 * q.y = p.w * trans.y + p.y * scale.y
9554 * q.z = p.w * trans.z + p.z * scale.z;
9555 * q.w = p.w * trans.w + p.w;
9556 */
9557 struct tgsi_full_src_register tmp_pos_src_w =
9558 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
9559 struct tgsi_full_dst_register tmp_pos_dst =
9560 make_dst_temp_reg(vs_pos_tmp_index);
9561 struct tgsi_full_dst_register tmp_pos_dst_xyz =
9562 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ);
9563
9564 struct tgsi_full_src_register prescale_scale =
9565 make_src_temp_reg(emit->vposition.prescale_scale_index);
9566 struct tgsi_full_src_register prescale_trans =
9567 make_src_temp_reg(emit->vposition.prescale_trans_index);
9568
9569 /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */
9570 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz,
9571 &tmp_pos_src, &prescale_scale);
9572
9573 /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */
9574 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w,
9575 &prescale_trans, &tmp_pos_src);
9576 }
9577 else if (emit->key.vs.undo_viewport) {
9578 /* This code computes the final vertex position from the temporary
9579 * vertex position by undoing the viewport transformation and the
9580 * divide-by-W operation (we convert window coords back to clip coords).
9581 * This is needed when we use the 'draw' module for fallbacks.
9582 * If p is the temp pos in window coords, then the NDC coord q is:
9583 * q.x = (p.x - vp.x_trans) / vp.x_scale * p.w
9584 * q.y = (p.y - vp.y_trans) / vp.y_scale * p.w
9585 * q.z = p.z * p.w
9586 * q.w = p.w
9587 * CONST[vs_viewport_index] contains:
9588 * { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans }
9589 */
9590 struct tgsi_full_dst_register tmp_pos_dst =
9591 make_dst_temp_reg(vs_pos_tmp_index);
9592 struct tgsi_full_dst_register tmp_pos_dst_xy =
9593 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY);
9594 struct tgsi_full_src_register tmp_pos_src_wwww =
9595 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
9596
9597 struct tgsi_full_dst_register pos_dst_xyz =
9598 writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ);
9599 struct tgsi_full_dst_register pos_dst_w =
9600 writemask_dst(&pos_dst, TGSI_WRITEMASK_W);
9601
9602 struct tgsi_full_src_register vp_xyzw =
9603 make_src_const_reg(emit->vs.viewport_index);
9604 struct tgsi_full_src_register vp_zwww =
9605 swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
9606 TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
9607
9608 /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */
9609 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy,
9610 &tmp_pos_src, &vp_zwww);
9611
9612 /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */
9613 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy,
9614 &tmp_pos_src, &vp_xyzw);
9615
9616 /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */
9617 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz,
9618 &tmp_pos_src, &tmp_pos_src_wwww);
9619
9620 /* MOV pos.w, tmp_pos.w */
9621 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, &tmp_pos_src);
9622 }
9623 else if (vs_pos_tmp_index != INVALID_INDEX) {
9624 /* This code is to handle the case where the temporary vertex
9625 * position register is created when the vertex shader has stream
9626 * output and prescale is disabled because rasterization is to be
9627 * discarded.
9628 */
9629 struct tgsi_full_dst_register pos_dst =
9630 make_dst_output_reg(emit->vposition.out_index);
9631
9632 /* MOV pos, tmp_pos */
9633 begin_emit_instruction(emit);
9634 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
9635 emit_dst_register(emit, &pos_dst);
9636 emit_src_register(emit, &tmp_pos_src);
9637 end_emit_instruction(emit);
9638 }
9639
9640 /* Restore original vposition.tmp_index value for the next GS vertex.
9641 * It doesn't matter for VS.
9642 */
9643 emit->vposition.tmp_index = vs_pos_tmp_index;
9644 }
9645
9646 static void
9647 emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
9648 {
9649 if (emit->clip_mode == CLIP_DISTANCE) {
9650 /* Copy from copy distance temporary to CLIPDIST & the shadow copy */
9651 emit_clip_distance_instructions(emit);
9652
9653 } else if (emit->clip_mode == CLIP_VERTEX &&
9654 emit->key.last_vertex_stage) {
9655 /* Convert TGSI CLIPVERTEX to CLIPDIST */
9656 emit_clip_vertex_instructions(emit);
9657 }
9658
9659 /**
9660 * Emit vertex position and take care of legacy user planes only if
9661 * there is a valid vertex position register index.
9662 * This is to take care of the case
9663 * where the shader doesn't output vertex position. Then in
9664 * this case, don't bother to emit more vertex instructions.
9665 */
9666 if (emit->vposition.out_index == INVALID_INDEX)
9667 return;
9668
9669 /**
9670 * Emit per-vertex clipping instructions for legacy user defined clip planes.
9671 * NOTE: we must emit the clip distance instructions before the
9672 * emit_vpos_instructions() call since the later function will change
9673 * the TEMP[vs_pos_tmp_index] value.
9674 */
9675 if (emit->clip_mode == CLIP_LEGACY && emit->key.last_vertex_stage) {
9676 /* Emit CLIPDIST for legacy user defined clip planes */
9677 emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index);
9678 }
9679 }
9680
9681
9682 /**
9683 * Emit extra per-vertex instructions. This includes clip-coordinate
9684 * space conversion and computing clip distances. This is called for
9685 * each GS emit-vertex instruction and at the end of VS translation.
9686 */
9687 static void
9688 emit_vertex_instructions(struct svga_shader_emitter_v10 *emit)
9689 {
9690 /* Emit clipping instructions based on clipping mode */
9691 emit_clipping_instructions(emit);
9692
9693 /* Emit vertex position instructions */
9694 emit_vpos_instructions(emit);
9695 }
9696
9697
9698 /**
9699 * Translate the TGSI_OPCODE_EMIT GS instruction.
9700 */
9701 static boolean
9702 emit_vertex(struct svga_shader_emitter_v10 *emit,
9703 const struct tgsi_full_instruction *inst)
9704 {
9705 unsigned ret = TRUE;
9706
9707 assert(emit->unit == PIPE_SHADER_GEOMETRY);
9708
9709 /**
9710 * Emit the viewport array index for the first vertex.
9711 */
9712 if (emit->gs.viewport_index_out_index != INVALID_INDEX) {
9713 struct tgsi_full_dst_register viewport_index_out =
9714 make_dst_output_reg(emit->gs.viewport_index_out_index);
9715 struct tgsi_full_dst_register viewport_index_out_x =
9716 writemask_dst(&viewport_index_out, TGSI_WRITEMASK_X);
9717 struct tgsi_full_src_register viewport_index_tmp =
9718 make_src_temp_reg(emit->gs.viewport_index_tmp_index);
9719
9720 /* Set the out index to INVALID_INDEX, so it will not
9721 * be assigned to a temp again in emit_dst_register, and
9722 * the viewport index will not be assigned again in the
9723 * subsequent vertices.
9724 */
9725 emit->gs.viewport_index_out_index = INVALID_INDEX;
9726 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
9727 &viewport_index_out_x, &viewport_index_tmp);
9728 }
9729
9730 /**
9731 * Find the stream index associated with this emit vertex instruction.
9732 */
9733 assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
9734 unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
9735
9736 /**
9737 * According to the ARB_gpu_shader5 spec, the built-in geometry shader
9738 * outputs are always associated with vertex stream zero.
9739 * So emit the extra vertex instructions for position or clip distance
9740 * for stream zero only.
9741 */
9742 if (streamIndex == 0) {
9743 /**
9744 * Before emitting vertex instructions, emit the temporaries for
9745 * the prescale constants based on the viewport index if needed.
9746 */
9747 if (emit->vposition.need_prescale && !emit->vposition.have_prescale)
9748 emit_temp_prescale_instructions(emit);
9749
9750 emit_vertex_instructions(emit);
9751 }
9752
9753 begin_emit_instruction(emit);
9754 if (emit->version >= 50) {
9755 if (emit->info.num_stream_output_components[streamIndex] == 0) {
9756 /**
9757 * If there is no output for this stream, discard this instruction.
9758 */
9759 emit->discard_instruction = TRUE;
9760 }
9761 else {
9762 emit_opcode(emit, VGPU10_OPCODE_EMIT_STREAM, FALSE);
9763 emit_stream_register(emit, streamIndex);
9764 }
9765 }
9766 else {
9767 emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE);
9768 }
9769 end_emit_instruction(emit);
9770
9771 return ret;
9772 }
9773
9774
9775 /**
9776 * Emit the extra code to convert from VGPU10's boolean front-face
9777 * register to TGSI's signed front-face register.
9778 *
9779 * TODO: Make temporary front-face register a scalar.
9780 */
9781 static void
9782 emit_frontface_instructions(struct svga_shader_emitter_v10 *emit)
9783 {
9784 assert(emit->unit == PIPE_SHADER_FRAGMENT);
9785
9786 if (emit->fs.face_input_index != INVALID_INDEX) {
9787 /* convert vgpu10 boolean face register to gallium +/-1 value */
9788 struct tgsi_full_dst_register tmp_dst =
9789 make_dst_temp_reg(emit->fs.face_tmp_index);
9790 struct tgsi_full_src_register one =
9791 make_immediate_reg_float(emit, 1.0f);
9792 struct tgsi_full_src_register neg_one =
9793 make_immediate_reg_float(emit, -1.0f);
9794
9795 /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */
9796 begin_emit_instruction(emit);
9797 emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE);
9798 emit_dst_register(emit, &tmp_dst);
9799 emit_face_register(emit);
9800 emit_src_register(emit, &one);
9801 emit_src_register(emit, &neg_one);
9802 end_emit_instruction(emit);
9803 }
9804 }
9805
9806
9807 /**
9808 * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w.
9809 */
9810 static void
9811 emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit)
9812 {
9813 assert(emit->unit == PIPE_SHADER_FRAGMENT);
9814
9815 if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
9816 struct tgsi_full_dst_register tmp_dst =
9817 make_dst_temp_reg(emit->fs.fragcoord_tmp_index);
9818 struct tgsi_full_dst_register tmp_dst_xyz =
9819 writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ);
9820 struct tgsi_full_dst_register tmp_dst_w =
9821 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
9822 struct tgsi_full_src_register one =
9823 make_immediate_reg_float(emit, 1.0f);
9824 struct tgsi_full_src_register fragcoord =
9825 make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index);
9826
9827 /* save the input index */
9828 unsigned fragcoord_input_index = emit->fs.fragcoord_input_index;
9829 /* set to invalid to prevent substitution in emit_src_register() */
9830 emit->fs.fragcoord_input_index = INVALID_INDEX;
9831
9832 /* MOV fragcoord_tmp.xyz, fragcoord.xyz */
9833 begin_emit_instruction(emit);
9834 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
9835 emit_dst_register(emit, &tmp_dst_xyz);
9836 emit_src_register(emit, &fragcoord);
9837 end_emit_instruction(emit);
9838
9839 /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */
9840 begin_emit_instruction(emit);
9841 emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE);
9842 emit_dst_register(emit, &tmp_dst_w);
9843 emit_src_register(emit, &one);
9844 emit_src_register(emit, &fragcoord);
9845 end_emit_instruction(emit);
9846
9847 /* restore saved value */
9848 emit->fs.fragcoord_input_index = fragcoord_input_index;
9849 }
9850 }
9851
9852
9853 /**
9854 * Emit the extra code to get the current sample position value and
9855 * put it into a temp register.
9856 */
9857 static void
9858 emit_sample_position_instructions(struct svga_shader_emitter_v10 *emit)
9859 {
9860 assert(emit->unit == PIPE_SHADER_FRAGMENT);
9861
9862 if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
9863 assert(emit->version >= 41);
9864
9865 struct tgsi_full_dst_register tmp_dst =
9866 make_dst_temp_reg(emit->fs.sample_pos_tmp_index);
9867 struct tgsi_full_src_register half =
9868 make_immediate_reg_float4(emit, 0.5, 0.5, 0.0, 0.0);
9869
9870 struct tgsi_full_src_register tmp_src =
9871 make_src_temp_reg(emit->fs.sample_pos_tmp_index);
9872 struct tgsi_full_src_register sample_index_reg =
9873 make_src_scalar_reg(TGSI_FILE_SYSTEM_VALUE,
9874 emit->fs.sample_id_sys_index, TGSI_SWIZZLE_X);
9875
9876 /* The first src register is a shader resource (if we want a
9877 * multisampled resource sample position) or the rasterizer register
9878 * (if we want the current sample position in the color buffer). We
9879 * want the later.
9880 */
9881
9882 /* SAMPLE_POS dst, RASTERIZER, sampleIndex */
9883 begin_emit_instruction(emit);
9884 emit_opcode(emit, VGPU10_OPCODE_SAMPLE_POS, FALSE);
9885 emit_dst_register(emit, &tmp_dst);
9886 emit_rasterizer_register(emit);
9887 emit_src_register(emit, &sample_index_reg);
9888 end_emit_instruction(emit);
9889
9890 /* Convert from D3D coords to GL coords by adding 0.5 bias */
9891 /* ADD dst, dst, half */
9892 begin_emit_instruction(emit);
9893 emit_opcode(emit, VGPU10_OPCODE_ADD, FALSE);
9894 emit_dst_register(emit, &tmp_dst);
9895 emit_src_register(emit, &tmp_src);
9896 emit_src_register(emit, &half);
9897 end_emit_instruction(emit);
9898 }
9899 }
9900
9901
9902 /**
9903 * Emit extra instructions to adjust VS inputs/attributes. This can
9904 * mean casting a vertex attribute from int to float or setting the
9905 * W component to 1, or both.
9906 */
9907 static void
9908 emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
9909 {
9910 const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1;
9911 const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof;
9912 const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof;
9913 const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra;
9914 const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm;
9915 const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled;
9916 const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled;
9917
9918 unsigned adjust_mask = (save_w_1_mask |
9919 save_itof_mask |
9920 save_utof_mask |
9921 save_is_bgra_mask |
9922 save_puint_to_snorm_mask |
9923 save_puint_to_uscaled_mask |
9924 save_puint_to_sscaled_mask);
9925
9926 assert(emit->unit == PIPE_SHADER_VERTEX);
9927
9928 if (adjust_mask) {
9929 struct tgsi_full_src_register one =
9930 make_immediate_reg_float(emit, 1.0f);
9931
9932 struct tgsi_full_src_register one_int =
9933 make_immediate_reg_int(emit, 1);
9934
9935 /* We need to turn off these bitmasks while emitting the
9936 * instructions below, then restore them afterward.
9937 */
9938 emit->key.vs.adjust_attrib_w_1 = 0;
9939 emit->key.vs.adjust_attrib_itof = 0;
9940 emit->key.vs.adjust_attrib_utof = 0;
9941 emit->key.vs.attrib_is_bgra = 0;
9942 emit->key.vs.attrib_puint_to_snorm = 0;
9943 emit->key.vs.attrib_puint_to_uscaled = 0;
9944 emit->key.vs.attrib_puint_to_sscaled = 0;
9945
9946 while (adjust_mask) {
9947 unsigned index = u_bit_scan(&adjust_mask);
9948
9949 /* skip the instruction if this vertex attribute is not being used */
9950 if (emit->info.input_usage_mask[index] == 0)
9951 continue;
9952
9953 unsigned tmp = emit->vs.adjusted_input[index];
9954 struct tgsi_full_src_register input_src =
9955 make_src_reg(TGSI_FILE_INPUT, index);
9956
9957 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9958 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9959 struct tgsi_full_dst_register tmp_dst_w =
9960 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
9961
9962 /* ITOF/UTOF/MOV tmp, input[index] */
9963 if (save_itof_mask & (1 << index)) {
9964 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF,
9965 &tmp_dst, &input_src);
9966 }
9967 else if (save_utof_mask & (1 << index)) {
9968 emit_instruction_op1(emit, VGPU10_OPCODE_UTOF,
9969 &tmp_dst, &input_src);
9970 }
9971 else if (save_puint_to_snorm_mask & (1 << index)) {
9972 emit_puint_to_snorm(emit, &tmp_dst, &input_src);
9973 }
9974 else if (save_puint_to_uscaled_mask & (1 << index)) {
9975 emit_puint_to_uscaled(emit, &tmp_dst, &input_src);
9976 }
9977 else if (save_puint_to_sscaled_mask & (1 << index)) {
9978 emit_puint_to_sscaled(emit, &tmp_dst, &input_src);
9979 }
9980 else {
9981 assert((save_w_1_mask | save_is_bgra_mask) & (1 << index));
9982 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
9983 &tmp_dst, &input_src);
9984 }
9985
9986 if (save_is_bgra_mask & (1 << index)) {
9987 emit_swap_r_b(emit, &tmp_dst, &tmp_src);
9988 }
9989
9990 if (save_w_1_mask & (1 << index)) {
9991 /* MOV tmp.w, 1.0 */
9992 if (emit->key.vs.attrib_is_pure_int & (1 << index)) {
9993 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
9994 &tmp_dst_w, &one_int);
9995 }
9996 else {
9997 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
9998 &tmp_dst_w, &one);
9999 }
10000 }
10001 }
10002
10003 emit->key.vs.adjust_attrib_w_1 = save_w_1_mask;
10004 emit->key.vs.adjust_attrib_itof = save_itof_mask;
10005 emit->key.vs.adjust_attrib_utof = save_utof_mask;
10006 emit->key.vs.attrib_is_bgra = save_is_bgra_mask;
10007 emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask;
10008 emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask;
10009 emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask;
10010 }
10011 }
10012
10013
10014 /* Find zero-value immedate for default layer index */
10015 static void
10016 emit_default_layer_instructions(struct svga_shader_emitter_v10 *emit)
10017 {
10018 assert(emit->unit == PIPE_SHADER_FRAGMENT);
10019
10020 /* immediate for default layer index 0 */
10021 if (emit->fs.layer_input_index != INVALID_INDEX) {
10022 union tgsi_immediate_data imm;
10023 imm.Int = 0;
10024 emit->fs.layer_imm_index = find_immediate(emit, imm, 0);
10025 }
10026 }
10027
10028
10029 static void
10030 emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
10031 unsigned cbuf_index,
10032 struct tgsi_full_dst_register *scale,
10033 struct tgsi_full_dst_register *translate)
10034 {
10035 struct tgsi_full_src_register scale_cbuf = make_src_const_reg(cbuf_index);
10036 struct tgsi_full_src_register trans_cbuf = make_src_const_reg(cbuf_index+1);
10037
10038 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, scale, &scale_cbuf);
10039 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, translate, &trans_cbuf);
10040 }
10041
10042
10043 /**
10044 * A recursive helper function to find the prescale from the constant buffer
10045 */
10046 static void
10047 find_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
10048 unsigned index, unsigned num_prescale,
10049 struct tgsi_full_src_register *vp_index,
10050 struct tgsi_full_dst_register *scale,
10051 struct tgsi_full_dst_register *translate,
10052 struct tgsi_full_src_register *tmp_src,
10053 struct tgsi_full_dst_register *tmp_dst)
10054 {
10055 if (num_prescale == 0)
10056 return;
10057
10058 if (index > 0) {
10059 /* ELSE */
10060 emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
10061 }
10062
10063 struct tgsi_full_src_register index_src =
10064 make_immediate_reg_int(emit, index);
10065
10066 if (index == 0) {
10067 /* GE tmp, vp_index, index */
10068 emit_instruction_op2(emit, VGPU10_OPCODE_GE, tmp_dst,
10069 vp_index, &index_src);
10070 } else {
10071 /* EQ tmp, vp_index, index */
10072 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, tmp_dst,
10073 vp_index, &index_src);
10074 }
10075
10076 /* IF tmp */
10077 emit_if(emit, tmp_src);
10078 emit_temp_prescale_from_cbuf(emit,
10079 emit->vposition.prescale_cbuf_index + 2 * index,
10080 scale, translate);
10081
10082 find_prescale_from_cbuf(emit, index+1, num_prescale-1,
10083 vp_index, scale, translate,
10084 tmp_src, tmp_dst);
10085
10086 /* ENDIF */
10087 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
10088 }
10089
10090
10091 /**
10092 * This helper function emits instructions to set the prescale
10093 * and translate temporaries to the correct constants from the
10094 * constant buffer according to the designated viewport.
10095 */
10096 static void
10097 emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit)
10098 {
10099 struct tgsi_full_dst_register prescale_scale =
10100 make_dst_temp_reg(emit->vposition.prescale_scale_index);
10101 struct tgsi_full_dst_register prescale_translate =
10102 make_dst_temp_reg(emit->vposition.prescale_trans_index);
10103
10104 unsigned prescale_cbuf_index = emit->vposition.prescale_cbuf_index;
10105
10106 if (emit->vposition.num_prescale == 1) {
10107 emit_temp_prescale_from_cbuf(emit,
10108 prescale_cbuf_index,
10109 &prescale_scale, &prescale_translate);
10110 } else {
10111 /**
10112 * Since SM5 device does not support dynamic indexing, we need
10113 * to do the if-else to find the prescale constants for the
10114 * specified viewport.
10115 */
10116 struct tgsi_full_src_register vp_index_src =
10117 make_src_temp_reg(emit->gs.viewport_index_tmp_index);
10118
10119 struct tgsi_full_src_register vp_index_src_x =
10120 scalar_src(&vp_index_src, TGSI_SWIZZLE_X);
10121
10122 unsigned tmp = get_temp_index(emit);
10123 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
10124 struct tgsi_full_src_register tmp_src_x =
10125 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
10126 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
10127
10128 find_prescale_from_cbuf(emit, 0, emit->vposition.num_prescale,
10129 &vp_index_src_x,
10130 &prescale_scale, &prescale_translate,
10131 &tmp_src_x, &tmp_dst);
10132 }
10133
10134 /* Mark prescale temporaries are emitted */
10135 emit->vposition.have_prescale = 1;
10136 }
10137
10138
10139 /**
10140 * A helper function to emit an instruction in a vertex shader to add a bias
10141 * to the VertexID system value. This patches the VertexID in the SVGA vertex
10142 * shader to include the base vertex of an indexed primitive or the start index
10143 * of a non-indexed primitive.
10144 */
10145 static void
10146 emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 *emit)
10147 {
10148 struct tgsi_full_src_register vertex_id_bias_index =
10149 make_src_const_reg(emit->vs.vertex_id_bias_index);
10150 struct tgsi_full_src_register vertex_id_sys_src =
10151 make_src_reg(TGSI_FILE_SYSTEM_VALUE, emit->vs.vertex_id_sys_index);
10152 struct tgsi_full_src_register vertex_id_sys_src_x =
10153 scalar_src(&vertex_id_sys_src, TGSI_SWIZZLE_X);
10154 struct tgsi_full_dst_register vertex_id_tmp_dst =
10155 make_dst_temp_reg(emit->vs.vertex_id_tmp_index);
10156
10157 /* IADD vertex_id_tmp, vertex_id_sys, vertex_id_bias */
10158 unsigned vertex_id_tmp_index = emit->vs.vertex_id_tmp_index;
10159 emit->vs.vertex_id_tmp_index = INVALID_INDEX;
10160 emit_instruction_opn(emit, VGPU10_OPCODE_IADD, &vertex_id_tmp_dst,
10161 &vertex_id_sys_src_x, &vertex_id_bias_index, NULL, FALSE,
10162 FALSE);
10163 emit->vs.vertex_id_tmp_index = vertex_id_tmp_index;
10164 }
10165
10166 /**
10167 * Hull Shader must have control point outputs. But tessellation
10168 * control shader can return without writing to control point output.
10169 * In this case, the control point output is assumed to be passthrough
10170 * from the control point input.
10171 * This helper function is to write out a control point output first in case
10172 * the tessellation control shader returns before writing a
10173 * control point output.
10174 */
10175 static void
10176 emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 *emit)
10177 {
10178 assert(emit->unit == PIPE_SHADER_TESS_CTRL);
10179 assert(emit->tcs.control_point_phase);
10180 assert(emit->tcs.control_point_input_index != INVALID_INDEX);
10181 assert(emit->tcs.control_point_out_index != INVALID_INDEX);
10182 assert(emit->tcs.invocation_id_sys_index != INVALID_INDEX);
10183
10184 /* UARL ADDR[INDEX].x INVOCATION.xxxx */
10185
10186 struct tgsi_full_src_register invocation_src;
10187 struct tgsi_full_dst_register addr_dst;
10188 struct tgsi_full_dst_register addr_dst_x;
10189 unsigned addr_tmp;
10190
10191 addr_tmp = emit->address_reg_index[emit->tcs.control_point_addr_index];
10192 addr_dst = make_dst_temp_reg(addr_tmp);
10193 addr_dst_x = writemask_dst(&addr_dst, TGSI_WRITEMASK_X);
10194
10195 invocation_src = make_src_reg(TGSI_FILE_SYSTEM_VALUE,
10196 emit->tcs.invocation_id_sys_index);
10197
10198 begin_emit_instruction(emit);
10199 emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
10200 emit_dst_register(emit, &addr_dst_x);
10201 emit_src_register(emit, &invocation_src);
10202 end_emit_instruction(emit);
10203
10204
10205 /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */
10206
10207 struct tgsi_full_src_register input_control_point;
10208 struct tgsi_full_dst_register output_control_point;
10209
10210 input_control_point = make_src_reg(TGSI_FILE_INPUT,
10211 emit->tcs.control_point_input_index);
10212 input_control_point.Register.Dimension = 1;
10213 input_control_point.Dimension.Indirect = 1;
10214 input_control_point.DimIndirect.File = TGSI_FILE_ADDRESS;
10215 input_control_point.DimIndirect.Index = emit->tcs.control_point_addr_index;
10216 output_control_point =
10217 make_dst_output_reg(emit->tcs.control_point_out_index);
10218
10219 begin_emit_instruction(emit);
10220 emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
10221 emit_dst_register(emit, &output_control_point);
10222 emit_src_register(emit, &input_control_point);
10223 end_emit_instruction(emit);
10224 }
10225
10226 /**
10227 * This functions constructs temporary tessfactor from VGPU10*_TESSFACTOR
10228 * values in domain shader. SM5 has tessfactors as floating point values where
10229 * as tgsi emit them as vector. This function allows to construct temp
10230 * tessfactor vector similar to TGSI_SEMANTIC_TESSINNER/OUTER filled with
10231 * values from VGPU10*_TESSFACTOR. Use this constructed vector whenever
10232 * TGSI_SEMANTIC_TESSINNER/OUTER is used in shader.
10233 */
10234 static void
10235 emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 *emit)
10236 {
10237 struct tgsi_full_src_register src;
10238 struct tgsi_full_dst_register dst;
10239
10240 if (emit->tes.inner.tgsi_index != INVALID_INDEX) {
10241 dst = make_dst_temp_reg(emit->tes.inner.temp_index);
10242
10243 switch (emit->tes.prim_mode) {
10244 case PIPE_PRIM_QUADS:
10245 src = make_src_scalar_reg(TGSI_FILE_INPUT,
10246 emit->tes.inner.in_index + 1, TGSI_SWIZZLE_X);
10247 dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
10248 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10249 /* fallthrough */
10250 case PIPE_PRIM_TRIANGLES:
10251 src = make_src_scalar_reg(TGSI_FILE_INPUT,
10252 emit->tes.inner.in_index, TGSI_SWIZZLE_X);
10253 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
10254 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10255 break;
10256 case PIPE_PRIM_LINES:
10257 /**
10258 * As per SM5 spec, InsideTessFactor for isolines are unused.
10259 * In fact glsl tessInnerLevel for isolines doesn't mean anything but if
10260 * any application try to read tessInnerLevel in TES when primitive type
10261 * is isolines, then instead of driver throwing segfault for accesing it,
10262 * return atleast vec(1.0f)
10263 */
10264 src = make_immediate_reg_float(emit, 1.0f);
10265 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10266 break;
10267 default:
10268 break;
10269 }
10270 }
10271
10272 if (emit->tes.outer.tgsi_index != INVALID_INDEX) {
10273 dst = make_dst_temp_reg(emit->tes.outer.temp_index);
10274
10275 switch (emit->tes.prim_mode) {
10276 case PIPE_PRIM_QUADS:
10277 src = make_src_scalar_reg(TGSI_FILE_INPUT,
10278 emit->tes.outer.in_index + 3, TGSI_SWIZZLE_X);
10279 dst = writemask_dst(&dst, TGSI_WRITEMASK_W);
10280 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10281 /* fallthrough */
10282 case PIPE_PRIM_TRIANGLES:
10283 src = make_src_scalar_reg(TGSI_FILE_INPUT,
10284 emit->tes.outer.in_index + 2, TGSI_SWIZZLE_X);
10285 dst = writemask_dst(&dst, TGSI_WRITEMASK_Z);
10286 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10287 /* fallthrough */
10288 case PIPE_PRIM_LINES:
10289 src = make_src_scalar_reg(TGSI_FILE_INPUT,
10290 emit->tes.outer.in_index + 1, TGSI_SWIZZLE_X);
10291 dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
10292 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10293
10294 src = make_src_scalar_reg(TGSI_FILE_INPUT,
10295 emit->tes.outer.in_index , TGSI_SWIZZLE_X);
10296 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
10297 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10298
10299 break;
10300 default:
10301 break;
10302 }
10303 }
10304 }
10305
10306
10307 static void
10308 emit_initialize_temp_instruction(struct svga_shader_emitter_v10 *emit)
10309 {
10310 struct tgsi_full_src_register src;
10311 struct tgsi_full_dst_register dst;
10312 unsigned vgpu10_temp_index = remap_temp_index(emit, TGSI_FILE_TEMPORARY,
10313 emit->initialize_temp_index);
10314 src = make_immediate_reg_float(emit, 0.0f);
10315 dst = make_dst_temp_reg(vgpu10_temp_index);
10316 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10317 emit->temp_map[emit->initialize_temp_index].initialized = TRUE;
10318 emit->initialize_temp_index = INVALID_INDEX;
10319 }
10320
10321
10322 /**
10323 * Emit any extra/helper declarations/code that we might need between
10324 * the declaration section and code section.
10325 */
10326 static boolean
10327 emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
10328 {
10329 /* Properties */
10330 if (emit->unit == PIPE_SHADER_GEOMETRY)
10331 emit_property_instructions(emit);
10332 else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10333 emit_hull_shader_declarations(emit);
10334
10335 /* Save the position of the first instruction token so that we can
10336 * do a second pass of the instructions for the patch constant phase.
10337 */
10338 emit->tcs.instruction_token_pos = emit->cur_tgsi_token;
10339 emit->tcs.fork_phase_add_signature = FALSE;
10340
10341 if (!emit_hull_shader_control_point_phase(emit)) {
10342 emit->skip_instruction = TRUE;
10343 return TRUE;
10344 }
10345
10346 /* Set the current tcs phase to control point phase */
10347 emit->tcs.control_point_phase = TRUE;
10348 }
10349 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
10350 emit_domain_shader_declarations(emit);
10351 }
10352
10353 /* Declare inputs */
10354 if (!emit_input_declarations(emit))
10355 return FALSE;
10356
10357 /* Declare outputs */
10358 if (!emit_output_declarations(emit))
10359 return FALSE;
10360
10361 /* Declare temporary registers */
10362 emit_temporaries_declaration(emit);
10363
10364 /* For PIPE_SHADER_TESS_CTRL, constants, samplers, resources and immediates
10365 * will already be declared in hs_decls (emit_hull_shader_declarations)
10366 */
10367 if (emit->unit != PIPE_SHADER_TESS_CTRL) {
10368 /* Declare constant registers */
10369 emit_constant_declaration(emit);
10370
10371 /* Declare samplers and resources */
10372 emit_sampler_declarations(emit);
10373 emit_resource_declarations(emit);
10374
10375 alloc_common_immediates(emit);
10376 /* Now, emit the constant block containing all the immediates
10377 * declared by shader, as well as the extra ones seen above.
10378 */
10379 }
10380
10381 if (emit->unit != PIPE_SHADER_FRAGMENT) {
10382 /*
10383 * Declare clip distance output registers for ClipVertex or
10384 * user defined planes
10385 */
10386 emit_clip_distance_declarations(emit);
10387 }
10388
10389 if (emit->unit == PIPE_SHADER_FRAGMENT &&
10390 emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
10391 float alpha = emit->key.fs.alpha_ref;
10392 emit->fs.alpha_ref_index =
10393 alloc_immediate_float4(emit, alpha, alpha, alpha, alpha);
10394 }
10395
10396 if (emit->unit != PIPE_SHADER_TESS_CTRL) {
10397 /**
10398 * For PIPE_SHADER_TESS_CTRL, immediates are already declared in
10399 * hs_decls
10400 */
10401 emit_vgpu10_immediates_block(emit);
10402 }
10403 else {
10404 emit_tcs_default_control_point_output(emit);
10405 }
10406
10407 if (emit->unit == PIPE_SHADER_FRAGMENT) {
10408 emit_frontface_instructions(emit);
10409 emit_fragcoord_instructions(emit);
10410 emit_sample_position_instructions(emit);
10411 emit_default_layer_instructions(emit);
10412 }
10413 else if (emit->unit == PIPE_SHADER_VERTEX) {
10414 emit_vertex_attrib_instructions(emit);
10415
10416 if (emit->info.uses_vertexid)
10417 emit_vertex_id_nobase_instruction(emit);
10418 }
10419 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
10420 emit_temp_tessfactor_instructions(emit);
10421 }
10422
10423 /**
10424 * For geometry shader that writes to viewport index, the prescale
10425 * temporaries will be done at the first vertex emission.
10426 */
10427 if (emit->vposition.need_prescale && emit->vposition.num_prescale == 1)
10428 emit_temp_prescale_instructions(emit);
10429
10430 return TRUE;
10431 }
10432
10433
10434 /**
10435 * The device has no direct support for the pipe_blend_state::alpha_to_one
10436 * option so we implement it here with shader code.
10437 *
10438 * Note that this is kind of pointless, actually. Here we're clobbering
10439 * the alpha value with 1.0. So if alpha-to-coverage is enabled, we'll wind
10440 * up with 100% coverage. That's almost certainly not what the user wants.
10441 * The work-around is to add extra shader code to compute coverage from alpha
10442 * and write it to the coverage output register (if the user's shader doesn't
10443 * do so already). We'll probably do that in the future.
10444 */
10445 static void
10446 emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 *emit,
10447 unsigned fs_color_tmp_index)
10448 {
10449 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
10450 unsigned i;
10451
10452 /* Note: it's not 100% clear from the spec if we're supposed to clobber
10453 * the alpha for all render targets. But that's what NVIDIA does and
10454 * that's what Piglit tests.
10455 */
10456 for (i = 0; i < emit->fs.num_color_outputs; i++) {
10457 struct tgsi_full_dst_register color_dst;
10458
10459 if (fs_color_tmp_index != INVALID_INDEX && i == 0) {
10460 /* write to the temp color register */
10461 color_dst = make_dst_temp_reg(fs_color_tmp_index);
10462 }
10463 else {
10464 /* write directly to the color[i] output */
10465 color_dst = make_dst_output_reg(emit->fs.color_out_index[i]);
10466 }
10467
10468 color_dst = writemask_dst(&color_dst, TGSI_WRITEMASK_W);
10469
10470 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one);
10471 }
10472 }
10473
10474
10475 /**
10476 * Emit alpha test code. This compares TEMP[fs_color_tmp_index].w
10477 * against the alpha reference value and discards the fragment if the
10478 * comparison fails.
10479 */
10480 static void
10481 emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
10482 unsigned fs_color_tmp_index)
10483 {
10484 /* compare output color's alpha to alpha ref and kill */
10485 unsigned tmp = get_temp_index(emit);
10486 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
10487 struct tgsi_full_src_register tmp_src_x =
10488 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
10489 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
10490 struct tgsi_full_src_register color_src =
10491 make_src_temp_reg(fs_color_tmp_index);
10492 struct tgsi_full_src_register color_src_w =
10493 scalar_src(&color_src, TGSI_SWIZZLE_W);
10494 struct tgsi_full_src_register ref_src =
10495 make_src_immediate_reg(emit->fs.alpha_ref_index);
10496 struct tgsi_full_dst_register color_dst =
10497 make_dst_output_reg(emit->fs.color_out_index[0]);
10498
10499 assert(emit->unit == PIPE_SHADER_FRAGMENT);
10500
10501 /* dst = src0 'alpha_func' src1 */
10502 emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst,
10503 &color_src_w, &ref_src);
10504
10505 /* DISCARD if dst.x == 0 */
10506 begin_emit_instruction(emit);
10507 emit_discard_opcode(emit, FALSE); /* discard if src0.x is zero */
10508 emit_src_register(emit, &tmp_src_x);
10509 end_emit_instruction(emit);
10510
10511 /* If we don't need to broadcast the color below, emit the final color here.
10512 */
10513 if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
10514 /* MOV output.color, tempcolor */
10515 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
10516 }
10517
10518 free_temp_indexes(emit);
10519 }
10520
10521
10522 /**
10523 * Emit instructions for writing a single color output to multiple
10524 * color buffers.
10525 * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or
10526 * when key.fs.white_fragments is true).
10527 * property is set and the number of render targets is greater than one.
10528 * \param fs_color_tmp_index index of the temp register that holds the
10529 * color to broadcast.
10530 */
10531 static void
10532 emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
10533 unsigned fs_color_tmp_index)
10534 {
10535 const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
10536 unsigned i;
10537 struct tgsi_full_src_register color_src;
10538
10539 if (emit->key.fs.white_fragments) {
10540 /* set all color outputs to white */
10541 color_src = make_immediate_reg_float(emit, 1.0f);
10542 }
10543 else {
10544 /* set all color outputs to TEMP[fs_color_tmp_index] */
10545 assert(fs_color_tmp_index != INVALID_INDEX);
10546 color_src = make_src_temp_reg(fs_color_tmp_index);
10547 }
10548
10549 assert(emit->unit == PIPE_SHADER_FRAGMENT);
10550
10551 for (i = 0; i < n; i++) {
10552 unsigned output_reg = emit->fs.color_out_index[i];
10553 struct tgsi_full_dst_register color_dst =
10554 make_dst_output_reg(output_reg);
10555
10556 /* Fill in this semantic here since we'll use it later in
10557 * emit_dst_register().
10558 */
10559 emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR;
10560
10561 /* MOV output.color[i], tempcolor */
10562 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
10563 }
10564 }
10565
10566
10567 /**
10568 * Emit extra helper code after the original shader code, but before the
10569 * last END/RET instruction.
10570 * For vertex shaders this means emitting the extra code to apply the
10571 * prescale scale/translation.
10572 */
10573 static boolean
10574 emit_post_helpers(struct svga_shader_emitter_v10 *emit)
10575 {
10576 if (emit->unit == PIPE_SHADER_VERTEX) {
10577 emit_vertex_instructions(emit);
10578 }
10579 else if (emit->unit == PIPE_SHADER_FRAGMENT) {
10580 const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
10581
10582 assert(!(emit->key.fs.white_fragments &&
10583 emit->key.fs.write_color0_to_n_cbufs == 0));
10584
10585 /* We no longer want emit_dst_register() to substitute the
10586 * temporary fragment color register for the real color output.
10587 */
10588 emit->fs.color_tmp_index = INVALID_INDEX;
10589
10590 if (emit->key.fs.alpha_to_one) {
10591 emit_alpha_to_one_instructions(emit, fs_color_tmp_index);
10592 }
10593 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
10594 emit_alpha_test_instructions(emit, fs_color_tmp_index);
10595 }
10596 if (emit->key.fs.write_color0_to_n_cbufs > 1 ||
10597 emit->key.fs.white_fragments) {
10598 emit_broadcast_color_instructions(emit, fs_color_tmp_index);
10599 }
10600 }
10601 else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10602 if (!emit->tcs.control_point_phase) {
10603 /* store the tessellation levels in the patch constant phase only */
10604 store_tesslevels(emit);
10605 }
10606 else {
10607 emit_clipping_instructions(emit);
10608 }
10609 }
10610 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
10611 emit_vertex_instructions(emit);
10612 }
10613
10614 return TRUE;
10615 }
10616
10617
10618 /**
10619 * Translate the TGSI tokens into VGPU10 tokens.
10620 */
10621 static boolean
10622 emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
10623 const struct tgsi_token *tokens)
10624 {
10625 struct tgsi_parse_context parse;
10626 boolean ret = TRUE;
10627 boolean pre_helpers_emitted = FALSE;
10628 unsigned inst_number = 0;
10629
10630 tgsi_parse_init(&parse, tokens);
10631
10632 while (!tgsi_parse_end_of_tokens(&parse)) {
10633
10634 /* Save the current tgsi token starting position */
10635 emit->cur_tgsi_token = parse.Position;
10636
10637 tgsi_parse_token(&parse);
10638
10639 switch (parse.FullToken.Token.Type) {
10640 case TGSI_TOKEN_TYPE_IMMEDIATE:
10641 ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate);
10642 if (!ret)
10643 goto done;
10644 break;
10645
10646 case TGSI_TOKEN_TYPE_DECLARATION:
10647 ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration);
10648 if (!ret)
10649 goto done;
10650 break;
10651
10652 case TGSI_TOKEN_TYPE_INSTRUCTION:
10653 if (!pre_helpers_emitted) {
10654 ret = emit_pre_helpers(emit);
10655 if (!ret)
10656 goto done;
10657 pre_helpers_emitted = TRUE;
10658 }
10659 ret = emit_vgpu10_instruction(emit, inst_number++,
10660 &parse.FullToken.FullInstruction);
10661
10662 /* Usually this applies to TCS only. If shader is reading control
10663 * point outputs in control point phase, we should reemit all
10664 * instructions which are writting into control point output in
10665 * control phase to store results into temporaries.
10666 */
10667 if (emit->reemit_instruction) {
10668 assert(emit->unit == PIPE_SHADER_TESS_CTRL);
10669 ret = emit_vgpu10_instruction(emit, inst_number,
10670 &parse.FullToken.FullInstruction);
10671 }
10672 else if (emit->initialize_temp_index != INVALID_INDEX) {
10673 emit_initialize_temp_instruction(emit);
10674 emit->initialize_temp_index = INVALID_INDEX;
10675 ret = emit_vgpu10_instruction(emit, inst_number - 1,
10676 &parse.FullToken.FullInstruction);
10677 }
10678
10679 if (!ret)
10680 goto done;
10681 break;
10682
10683 case TGSI_TOKEN_TYPE_PROPERTY:
10684 ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty);
10685 if (!ret)
10686 goto done;
10687 break;
10688
10689 default:
10690 break;
10691 }
10692 }
10693
10694 if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10695 ret = emit_hull_shader_patch_constant_phase(emit, &parse);
10696 }
10697
10698 done:
10699 tgsi_parse_free(&parse);
10700 return ret;
10701 }
10702
10703
10704 /**
10705 * Emit the first VGPU10 shader tokens.
10706 */
10707 static boolean
10708 emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
10709 {
10710 VGPU10ProgramToken ptoken;
10711
10712 /* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */
10713 ptoken.value = 0; /* init whole token to zero */
10714 ptoken.majorVersion = emit->version / 10;
10715 ptoken.minorVersion = emit->version % 10;
10716 ptoken.programType = translate_shader_type(emit->unit);
10717 if (!emit_dword(emit, ptoken.value))
10718 return FALSE;
10719
10720 /* Second token: total length of shader, in tokens. We can't fill this
10721 * in until we're all done. Emit zero for now.
10722 */
10723 if (!emit_dword(emit, 0))
10724 return FALSE;
10725
10726 if (emit->version >= 50) {
10727 VGPU10OpcodeToken0 token;
10728
10729 if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10730 /* For hull shader, we need to start the declarations phase first before
10731 * emitting any declarations including the global flags.
10732 */
10733 token.value = 0;
10734 token.opcodeType = VGPU10_OPCODE_HS_DECLS;
10735 begin_emit_instruction(emit);
10736 emit_dword(emit, token.value);
10737 end_emit_instruction(emit);
10738 }
10739
10740 /* Emit global flags */
10741 token.value = 0; /* init whole token to zero */
10742 token.opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
10743 token.enableDoublePrecisionFloatOps = 1; /* set bit */
10744 token.instructionLength = 1;
10745 if (!emit_dword(emit, token.value))
10746 return FALSE;
10747 }
10748
10749 if (emit->version >= 40) {
10750 VGPU10OpcodeToken0 token;
10751
10752 /* Reserved for global flag such as refactoringAllowed.
10753 * If the shader does not use the precise qualifier, we will set the
10754 * refactoringAllowed global flag; otherwise, we will leave the reserved
10755 * token to NOP.
10756 */
10757 emit->reserved_token = (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0);
10758 token.value = 0;
10759 token.opcodeType = VGPU10_OPCODE_NOP;
10760 token.instructionLength = 1;
10761 if (!emit_dword(emit, token.value))
10762 return FALSE;
10763 }
10764
10765 return TRUE;
10766 }
10767
10768
10769 static boolean
10770 emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit)
10771 {
10772 VGPU10ProgramToken *tokens;
10773
10774 /* Replace the second token with total shader length */
10775 tokens = (VGPU10ProgramToken *) emit->buf;
10776 tokens[1].value = emit_get_num_tokens(emit);
10777
10778 if (emit->version >= 40 && !emit->uses_precise_qualifier) {
10779 /* Replace the reserved token with the RefactoringAllowed global flag */
10780 VGPU10OpcodeToken0 *ptoken;
10781
10782 ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token];
10783 assert(ptoken->opcodeType == VGPU10_OPCODE_NOP);
10784 ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
10785 ptoken->refactoringAllowed = 1;
10786 }
10787
10788 return TRUE;
10789 }
10790
10791
10792 /**
10793 * Modify the FS to read the BCOLORs and use the FACE register
10794 * to choose between the front/back colors.
10795 */
10796 static const struct tgsi_token *
10797 transform_fs_twoside(const struct tgsi_token *tokens)
10798 {
10799 if (0) {
10800 debug_printf("Before tgsi_add_two_side ------------------\n");
10801 tgsi_dump(tokens,0);
10802 }
10803 tokens = tgsi_add_two_side(tokens);
10804 if (0) {
10805 debug_printf("After tgsi_add_two_side ------------------\n");
10806 tgsi_dump(tokens, 0);
10807 }
10808 return tokens;
10809 }
10810
10811
10812 /**
10813 * Modify the FS to do polygon stipple.
10814 */
10815 static const struct tgsi_token *
10816 transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
10817 const struct tgsi_token *tokens)
10818 {
10819 const struct tgsi_token *new_tokens;
10820 unsigned unit;
10821
10822 if (0) {
10823 debug_printf("Before pstipple ------------------\n");
10824 tgsi_dump(tokens,0);
10825 }
10826
10827 new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
10828 TGSI_FILE_INPUT);
10829
10830 emit->fs.pstipple_sampler_unit = unit;
10831
10832 /* Setup texture state for stipple */
10833 emit->sampler_target[unit] = TGSI_TEXTURE_2D;
10834 emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
10835 emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
10836 emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
10837 emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
10838
10839 if (0) {
10840 debug_printf("After pstipple ------------------\n");
10841 tgsi_dump(new_tokens, 0);
10842 }
10843
10844 return new_tokens;
10845 }
10846
10847 /**
10848 * Modify the FS to support anti-aliasing point.
10849 */
10850 static const struct tgsi_token *
10851 transform_fs_aapoint(const struct tgsi_token *tokens,
10852 int aa_coord_index)
10853 {
10854 if (0) {
10855 debug_printf("Before tgsi_add_aa_point ------------------\n");
10856 tgsi_dump(tokens,0);
10857 }
10858 tokens = tgsi_add_aa_point(tokens, aa_coord_index);
10859 if (0) {
10860 debug_printf("After tgsi_add_aa_point ------------------\n");
10861 tgsi_dump(tokens, 0);
10862 }
10863 return tokens;
10864 }
10865
10866
10867 /**
10868 * A helper function to determine the shader in the previous stage and
10869 * then call the linker function to determine the input mapping for this
10870 * shader to match the output indices from the shader in the previous stage.
10871 */
10872 static void
10873 compute_input_mapping(struct svga_context *svga,
10874 struct svga_shader_emitter_v10 *emit,
10875 enum pipe_shader_type unit)
10876 {
10877 struct svga_shader *prevShader = NULL; /* shader in the previous stage */
10878
10879 if (unit == PIPE_SHADER_FRAGMENT) {
10880 prevShader = svga->curr.gs ?
10881 &svga->curr.gs->base : (svga->curr.tes ?
10882 &svga->curr.tes->base : &svga->curr.vs->base);
10883 } else if (unit == PIPE_SHADER_GEOMETRY) {
10884 prevShader = svga->curr.tes ? &svga->curr.tes->base : &svga->curr.vs->base;
10885 } else if (unit == PIPE_SHADER_TESS_EVAL) {
10886 assert(svga->curr.tcs);
10887 prevShader = &svga->curr.tcs->base;
10888 } else if (unit == PIPE_SHADER_TESS_CTRL) {
10889 assert(svga->curr.vs);
10890 prevShader = &svga->curr.vs->base;
10891 }
10892
10893 if (prevShader != NULL) {
10894 svga_link_shaders(&prevShader->info, &emit->info, &emit->linkage);
10895 emit->prevShaderInfo = &prevShader->info;
10896 }
10897 else {
10898 /**
10899 * Since vertex shader does not need to go through the linker to
10900 * establish the input map, we need to make sure the highest index
10901 * of input registers is set properly here.
10902 */
10903 emit->linkage.input_map_max = MAX2((int)emit->linkage.input_map_max,
10904 emit->info.file_max[TGSI_FILE_INPUT]);
10905 }
10906 }
10907
10908
10909 /**
10910 * Copies the shader signature info to the shader variant
10911 */
10912 static void
10913 copy_shader_signature(struct svga_shader_signature *sgn,
10914 struct svga_shader_variant *variant)
10915 {
10916 SVGA3dDXShaderSignatureHeader *header = &sgn->header;
10917
10918 /* Calculate the signature length */
10919 variant->signatureLen = sizeof(SVGA3dDXShaderSignatureHeader) +
10920 (header->numInputSignatures +
10921 header->numOutputSignatures +
10922 header->numPatchConstantSignatures) *
10923 sizeof(SVGA3dDXShaderSignatureEntry);
10924
10925 /* Allocate buffer for the signature info */
10926 variant->signature =
10927 (SVGA3dDXShaderSignatureHeader *)CALLOC(1, variant->signatureLen);
10928
10929 char *sgnBuf = (char *)variant->signature;
10930 unsigned sgnLen;
10931
10932 /* Copy the signature info to the shader variant structure */
10933 memcpy(sgnBuf, &sgn->header, sizeof(SVGA3dDXShaderSignatureHeader));
10934 sgnBuf += sizeof(SVGA3dDXShaderSignatureHeader);
10935
10936 if (header->numInputSignatures) {
10937 sgnLen =
10938 header->numInputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
10939 memcpy(sgnBuf, &sgn->inputs[0], sgnLen);
10940 sgnBuf += sgnLen;
10941 }
10942
10943 if (header->numOutputSignatures) {
10944 sgnLen =
10945 header->numOutputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
10946 memcpy(sgnBuf, &sgn->outputs[0], sgnLen);
10947 sgnBuf += sgnLen;
10948 }
10949
10950 if (header->numPatchConstantSignatures) {
10951 sgnLen =
10952 header->numPatchConstantSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
10953 memcpy(sgnBuf, &sgn->patchConstants[0], sgnLen);
10954 }
10955 }
10956
10957
10958 /**
10959 * This is the main entrypoint for the TGSI -> VPGU10 translator.
10960 */
10961 struct svga_shader_variant *
10962 svga_tgsi_vgpu10_translate(struct svga_context *svga,
10963 const struct svga_shader *shader,
10964 const struct svga_compile_key *key,
10965 enum pipe_shader_type unit)
10966 {
10967 struct svga_shader_variant *variant = NULL;
10968 struct svga_shader_emitter_v10 *emit;
10969 const struct tgsi_token *tokens = shader->tokens;
10970
10971 (void) make_immediate_reg_double; /* unused at this time */
10972
10973 assert(unit == PIPE_SHADER_VERTEX ||
10974 unit == PIPE_SHADER_GEOMETRY ||
10975 unit == PIPE_SHADER_FRAGMENT ||
10976 unit == PIPE_SHADER_TESS_CTRL ||
10977 unit == PIPE_SHADER_TESS_EVAL ||
10978 unit == PIPE_SHADER_COMPUTE);
10979
10980 /* These two flags cannot be used together */
10981 assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
10982
10983 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE);
10984 /*
10985 * Setup the code emitter
10986 */
10987 emit = alloc_emitter();
10988 if (!emit)
10989 goto done;
10990
10991 emit->unit = unit;
10992 if (svga_have_sm5(svga)) {
10993 emit->version = 50;
10994 } else if (svga_have_sm4_1(svga)) {
10995 emit->version = 41;
10996 } else {
10997 emit->version = 40;
10998 }
10999
11000 emit->signature.header.headerVersion = SVGADX_SIGNATURE_HEADER_VERSION_0;
11001
11002 emit->key = *key;
11003
11004 emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
11005 emit->key.gs.need_prescale ||
11006 emit->key.tes.need_prescale);
11007
11008 /* Determine how many prescale factors in the constant buffer */
11009 emit->vposition.num_prescale = 1;
11010 if (emit->vposition.need_prescale && emit->key.gs.writes_viewport_index) {
11011 assert(emit->unit == PIPE_SHADER_GEOMETRY);
11012 emit->vposition.num_prescale = emit->key.gs.num_prescale;
11013 }
11014
11015 emit->vposition.tmp_index = INVALID_INDEX;
11016 emit->vposition.so_index = INVALID_INDEX;
11017 emit->vposition.out_index = INVALID_INDEX;
11018
11019 emit->vs.vertex_id_sys_index = INVALID_INDEX;
11020 emit->vs.vertex_id_tmp_index = INVALID_INDEX;
11021 emit->vs.vertex_id_bias_index = INVALID_INDEX;
11022
11023 emit->fs.color_tmp_index = INVALID_INDEX;
11024 emit->fs.face_input_index = INVALID_INDEX;
11025 emit->fs.fragcoord_input_index = INVALID_INDEX;
11026 emit->fs.sample_id_sys_index = INVALID_INDEX;
11027 emit->fs.sample_pos_sys_index = INVALID_INDEX;
11028 emit->fs.sample_mask_in_sys_index = INVALID_INDEX;
11029 emit->fs.layer_input_index = INVALID_INDEX;
11030 emit->fs.layer_imm_index = INVALID_INDEX;
11031
11032 emit->gs.prim_id_index = INVALID_INDEX;
11033 emit->gs.invocation_id_sys_index = INVALID_INDEX;
11034 emit->gs.viewport_index_out_index = INVALID_INDEX;
11035 emit->gs.viewport_index_tmp_index = INVALID_INDEX;
11036
11037 emit->tcs.vertices_per_patch_index = INVALID_INDEX;
11038 emit->tcs.invocation_id_sys_index = INVALID_INDEX;
11039 emit->tcs.control_point_input_index = INVALID_INDEX;
11040 emit->tcs.control_point_addr_index = INVALID_INDEX;
11041 emit->tcs.control_point_out_index = INVALID_INDEX;
11042 emit->tcs.control_point_tmp_index = INVALID_INDEX;
11043 emit->tcs.control_point_out_count = 0;
11044 emit->tcs.inner.out_index = INVALID_INDEX;
11045 emit->tcs.inner.out_index = INVALID_INDEX;
11046 emit->tcs.inner.temp_index = INVALID_INDEX;
11047 emit->tcs.inner.tgsi_index = INVALID_INDEX;
11048 emit->tcs.outer.out_index = INVALID_INDEX;
11049 emit->tcs.outer.temp_index = INVALID_INDEX;
11050 emit->tcs.outer.tgsi_index = INVALID_INDEX;
11051 emit->tcs.patch_generic_out_count = 0;
11052 emit->tcs.patch_generic_out_index = INVALID_INDEX;
11053 emit->tcs.patch_generic_tmp_index = INVALID_INDEX;
11054 emit->tcs.prim_id_index = INVALID_INDEX;
11055
11056 emit->tes.tesscoord_sys_index = INVALID_INDEX;
11057 emit->tes.inner.in_index = INVALID_INDEX;
11058 emit->tes.inner.temp_index = INVALID_INDEX;
11059 emit->tes.inner.tgsi_index = INVALID_INDEX;
11060 emit->tes.outer.in_index = INVALID_INDEX;
11061 emit->tes.outer.temp_index = INVALID_INDEX;
11062 emit->tes.outer.tgsi_index = INVALID_INDEX;
11063 emit->tes.prim_id_index = INVALID_INDEX;
11064
11065 emit->clip_dist_out_index = INVALID_INDEX;
11066 emit->clip_dist_tmp_index = INVALID_INDEX;
11067 emit->clip_dist_so_index = INVALID_INDEX;
11068 emit->clip_vertex_out_index = INVALID_INDEX;
11069 emit->clip_vertex_tmp_index = INVALID_INDEX;
11070 emit->svga_debug_callback = svga->debug.callback;
11071
11072 emit->index_range.start_index = INVALID_INDEX;
11073 emit->index_range.count = 0;
11074 emit->index_range.required = FALSE;
11075 emit->index_range.operandType = VGPU10_NUM_OPERANDS;
11076 emit->index_range.dim = 0;
11077 emit->index_range.size = 0;
11078
11079 emit->current_loop_depth = 0;
11080
11081 emit->initialize_temp_index = INVALID_INDEX;
11082
11083 if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) {
11084 emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS;
11085 }
11086
11087 if (unit == PIPE_SHADER_FRAGMENT) {
11088 if (key->fs.light_twoside) {
11089 tokens = transform_fs_twoside(tokens);
11090 }
11091 if (key->fs.pstipple) {
11092 const struct tgsi_token *new_tokens =
11093 transform_fs_pstipple(emit, tokens);
11094 if (tokens != shader->tokens) {
11095 /* free the two-sided shader tokens */
11096 tgsi_free_tokens(tokens);
11097 }
11098 tokens = new_tokens;
11099 }
11100 if (key->fs.aa_point) {
11101 tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index);
11102 }
11103 }
11104
11105 if (SVGA_DEBUG & DEBUG_TGSI) {
11106 debug_printf("#####################################\n");
11107 debug_printf("### TGSI Shader %u\n", shader->id);
11108 tgsi_dump(tokens, 0);
11109 }
11110
11111 /**
11112 * Rescan the header if the token string is different from the one
11113 * included in the shader; otherwise, the header info is already up-to-date
11114 */
11115 if (tokens != shader->tokens) {
11116 tgsi_scan_shader(tokens, &emit->info);
11117 } else {
11118 emit->info = shader->info;
11119 }
11120
11121 emit->num_outputs = emit->info.num_outputs;
11122
11123 /**
11124 * Compute input mapping to match the outputs from shader
11125 * in the previous stage
11126 */
11127 compute_input_mapping(svga, emit, unit);
11128
11129 determine_clipping_mode(emit);
11130
11131 if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX ||
11132 unit == PIPE_SHADER_TESS_CTRL || unit == PIPE_SHADER_TESS_EVAL) {
11133 if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) {
11134 /* if there is stream output declarations associated
11135 * with this shader or the shader writes to ClipDistance
11136 * then reserve extra registers for the non-adjusted vertex position
11137 * and the ClipDistance shadow copy.
11138 */
11139 emit->vposition.so_index = emit->num_outputs++;
11140
11141 if (emit->clip_mode == CLIP_DISTANCE) {
11142 emit->clip_dist_so_index = emit->num_outputs++;
11143 if (emit->info.num_written_clipdistance > 4)
11144 emit->num_outputs++;
11145 }
11146 }
11147 }
11148
11149 /*
11150 * Do actual shader translation.
11151 */
11152 if (!emit_vgpu10_header(emit)) {
11153 debug_printf("svga: emit VGPU10 header failed\n");
11154 goto cleanup;
11155 }
11156
11157 if (!emit_vgpu10_instructions(emit, tokens)) {
11158 debug_printf("svga: emit VGPU10 instructions failed\n");
11159 goto cleanup;
11160 }
11161
11162 if (!emit_vgpu10_tail(emit)) {
11163 debug_printf("svga: emit VGPU10 tail failed\n");
11164 goto cleanup;
11165 }
11166
11167 if (emit->register_overflow) {
11168 goto cleanup;
11169 }
11170
11171 /*
11172 * Create, initialize the 'variant' object.
11173 */
11174 variant = svga_new_shader_variant(svga, unit);
11175 if (!variant)
11176 goto cleanup;
11177
11178 variant->shader = shader;
11179 variant->nr_tokens = emit_get_num_tokens(emit);
11180 variant->tokens = (const unsigned *)emit->buf;
11181
11182 /* Copy shader signature info to the shader variant */
11183 if (svga_have_sm5(svga)) {
11184 copy_shader_signature(&emit->signature, variant);
11185 }
11186
11187 emit->buf = NULL; /* buffer is no longer owed by emitter context */
11188 memcpy(&variant->key, key, sizeof(*key));
11189 variant->id = UTIL_BITMASK_INVALID_INDEX;
11190
11191 /* The extra constant starting offset starts with the number of
11192 * shader constants declared in the shader.
11193 */
11194 variant->extra_const_start = emit->num_shader_consts[0];
11195 if (key->gs.wide_point) {
11196 /**
11197 * The extra constant added in the transformed shader
11198 * for inverse viewport scale is to be supplied by the driver.
11199 * So the extra constant starting offset needs to be reduced by 1.
11200 */
11201 assert(variant->extra_const_start > 0);
11202 variant->extra_const_start--;
11203 }
11204
11205 if (unit == PIPE_SHADER_FRAGMENT) {
11206 struct svga_fs_variant *fs_variant = svga_fs_variant(variant);
11207
11208 fs_variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
11209
11210 /* If there was exactly one write to a fragment shader output register
11211 * and it came from a constant buffer, we know all fragments will have
11212 * the same color (except for blending).
11213 */
11214 fs_variant->constant_color_output =
11215 emit->constant_color_output && emit->num_output_writes == 1;
11216
11217 /** keep track in the variant if flat interpolation is used
11218 * for any of the varyings.
11219 */
11220 fs_variant->uses_flat_interp = emit->uses_flat_interp;
11221
11222 fs_variant->fs_shadow_compare_units = emit->fs.shadow_compare_units;
11223 }
11224 else if (unit == PIPE_SHADER_TESS_EVAL) {
11225 struct svga_tes_variant *tes_variant = svga_tes_variant(variant);
11226
11227 /* Keep track in the tes variant some of the layout parameters.
11228 * These parameters will be referenced by the tcs to emit
11229 * the necessary declarations for the hull shader.
11230 */
11231 tes_variant->prim_mode = emit->tes.prim_mode;
11232 tes_variant->spacing = emit->tes.spacing;
11233 tes_variant->vertices_order_cw = emit->tes.vertices_order_cw;
11234 tes_variant->point_mode = emit->tes.point_mode;
11235 }
11236
11237
11238 if (tokens != shader->tokens) {
11239 tgsi_free_tokens(tokens);
11240 }
11241
11242 cleanup:
11243 free_emitter(emit);
11244
11245 done:
11246 SVGA_STATS_TIME_POP(svga_sws(svga));
11247 return variant;
11248 }