1 /**********************************************************
2 * Copyright 1998-2013 VMware, Inc. All rights reserved.
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 **********************************************************/
27 * @file svga_tgsi_vgpu10.c
29 * TGSI -> VGPU10 shader translation.
31 * \author Mingcheng Chen
35 #include "pipe/p_compiler.h"
36 #include "pipe/p_shader_tokens.h"
37 #include "pipe/p_defines.h"
38 #include "tgsi/tgsi_build.h"
39 #include "tgsi/tgsi_dump.h"
40 #include "tgsi/tgsi_info.h"
41 #include "tgsi/tgsi_parse.h"
42 #include "tgsi/tgsi_scan.h"
43 #include "tgsi/tgsi_strings.h"
44 #include "tgsi/tgsi_two_side.h"
45 #include "tgsi/tgsi_aa_point.h"
46 #include "tgsi/tgsi_util.h"
47 #include "util/u_math.h"
48 #include "util/u_memory.h"
49 #include "util/u_bitmask.h"
50 #include "util/u_debug.h"
51 #include "util/u_pstipple.h"
53 #include "svga_context.h"
54 #include "svga_debug.h"
55 #include "svga_link.h"
56 #include "svga_shader.h"
57 #include "svga_tgsi.h"
59 #include "VGPU10ShaderTokens.h"
62 #define INVALID_INDEX 99999
63 #define MAX_INTERNAL_TEMPS 3
64 #define MAX_SYSTEM_VALUES 4
65 #define MAX_IMMEDIATE_COUNT \
66 (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
67 #define MAX_TEMP_ARRAYS 64 /* Enough? */
71 * Clipping is complicated. There's four different cases which we
72 * handle during VS/GS shader translation:
76 CLIP_NONE
, /**< No clipping enabled */
77 CLIP_LEGACY
, /**< The shader has no clipping declarations or code but
78 * one or more user-defined clip planes are enabled. We
79 * generate extra code to emit clip distances.
81 CLIP_DISTANCE
, /**< The shader already declares clip distance output
82 * registers and has code to write to them.
84 CLIP_VERTEX
/**< The shader declares a clip vertex output register and
85 * has code that writes to the register. We convert the
86 * clipvertex position into one or more clip distances.
91 /* Shader signature info */
92 struct svga_shader_signature
94 SVGA3dDXShaderSignatureHeader header
;
95 SVGA3dDXShaderSignatureEntry inputs
[PIPE_MAX_SHADER_INPUTS
];
96 SVGA3dDXShaderSignatureEntry outputs
[PIPE_MAX_SHADER_OUTPUTS
];
97 SVGA3dDXShaderSignatureEntry patchConstants
[PIPE_MAX_SHADER_OUTPUTS
];
101 set_shader_signature_entry(SVGA3dDXShaderSignatureEntry
*e
,
103 SVGA3dDXSignatureSemanticName sgnName
,
105 SVGA3dDXSignatureRegisterComponentType compType
,
106 SVGA3dDXSignatureMinPrecision minPrecision
)
108 e
->registerIndex
= index
;
109 e
->semanticName
= sgnName
;
111 e
->componentType
= compType
;
112 e
->minPrecision
= minPrecision
;
115 static const SVGA3dDXSignatureSemanticName
116 tgsi_semantic_to_sgn_name
[TGSI_SEMANTIC_COUNT
] = {
117 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION
,
118 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
119 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
120 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
121 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
122 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
123 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
124 SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE
,
125 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
126 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID
,
127 SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID
,
128 SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID
,
129 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
130 SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE
,
131 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
132 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
133 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
134 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
135 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
136 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
137 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
138 SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX
,
139 SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX
,
140 SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX
,
141 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
142 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
143 SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID
,
144 SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID
,
145 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
146 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
147 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
148 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
149 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
150 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
151 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
152 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
153 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
154 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
155 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
156 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
157 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
158 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
159 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
160 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
,
161 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
166 * Map tgsi semantic name to SVGA signature semantic name
168 static inline SVGA3dDXSignatureSemanticName
169 map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name
)
171 assert(name
< TGSI_SEMANTIC_COUNT
);
173 /* Do a few asserts here to spot check the mapping */
174 assert(tgsi_semantic_to_sgn_name
[TGSI_SEMANTIC_PRIMID
] ==
175 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID
);
176 assert(tgsi_semantic_to_sgn_name
[TGSI_SEMANTIC_VIEWPORT_INDEX
] ==
177 SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX
);
178 assert(tgsi_semantic_to_sgn_name
[TGSI_SEMANTIC_INVOCATIONID
] ==
179 SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID
);
181 return tgsi_semantic_to_sgn_name
[name
];
185 struct svga_shader_emitter_v10
187 /* The token output buffer */
192 /* Information about the shader and state (does not change) */
193 struct svga_compile_key key
;
194 struct tgsi_shader_info info
;
196 unsigned version
; /**< Either 40 or 41 at this time */
198 unsigned cur_tgsi_token
; /**< current tgsi token position */
199 unsigned inst_start_token
;
200 boolean discard_instruction
; /**< throw away current instruction? */
201 boolean reemit_instruction
; /**< reemit current instruction */
202 boolean skip_instruction
; /**< skip current instruction */
204 union tgsi_immediate_data immediates
[MAX_IMMEDIATE_COUNT
][4];
205 double (*immediates_dbl
)[2];
206 unsigned num_immediates
; /**< Number of immediates emitted */
207 unsigned common_immediate_pos
[10]; /**< literals for common immediates */
208 unsigned num_common_immediates
;
209 boolean immediates_emitted
;
211 unsigned num_outputs
; /**< include any extra outputs */
212 /** The first extra output is reserved for
213 * non-adjusted vertex position for
214 * stream output purpose
217 /* Temporary Registers */
218 unsigned num_shader_temps
; /**< num of temps used by original shader */
219 unsigned internal_temp_count
; /**< currently allocated internal temps */
221 unsigned start
, size
;
222 } temp_arrays
[MAX_TEMP_ARRAYS
];
223 unsigned num_temp_arrays
;
225 /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */
227 unsigned arrayId
, index
;
229 } temp_map
[VGPU10_MAX_TEMPS
]; /**< arrayId, element */
231 unsigned initialize_temp_index
;
233 /** Number of constants used by original shader for each constant buffer.
234 * The size should probably always match with that of svga_state.constbufs.
236 unsigned num_shader_consts
[SVGA_MAX_CONST_BUFS
];
239 unsigned num_samplers
;
240 boolean sampler_view
[PIPE_MAX_SAMPLERS
]; /**< True if sampler view exists*/
241 ubyte sampler_target
[PIPE_MAX_SAMPLERS
]; /**< TGSI_TEXTURE_x */
242 ubyte sampler_return_type
[PIPE_MAX_SAMPLERS
]; /**< TGSI_RETURN_TYPE_x */
244 /* Index Range declaration */
246 unsigned start_index
;
249 unsigned operandType
;
254 /* Address regs (really implemented with temps) */
255 unsigned num_address_regs
;
256 unsigned address_reg_index
[MAX_VGPU10_ADDR_REGS
];
258 /* Output register usage masks */
259 ubyte output_usage_mask
[PIPE_MAX_SHADER_OUTPUTS
];
261 /* To map TGSI system value index to VGPU shader input indexes */
262 ubyte system_value_indexes
[MAX_SYSTEM_VALUES
];
265 /* vertex position scale/translation */
266 unsigned out_index
; /**< the real position output reg */
267 unsigned tmp_index
; /**< the fake/temp position output reg */
268 unsigned so_index
; /**< the non-adjusted position output reg */
269 unsigned prescale_cbuf_index
; /* index to the const buf for prescale */
270 unsigned prescale_scale_index
, prescale_trans_index
;
271 unsigned num_prescale
; /* number of prescale factor in const buf */
272 unsigned viewport_index
;
273 unsigned need_prescale
:1;
274 unsigned have_prescale
:1;
277 /* For vertex shaders only */
279 /* viewport constant */
280 unsigned viewport_index
;
282 unsigned vertex_id_bias_index
;
283 unsigned vertex_id_sys_index
;
284 unsigned vertex_id_tmp_index
;
286 /* temp index of adjusted vertex attributes */
287 unsigned adjusted_input
[PIPE_MAX_SHADER_INPUTS
];
290 /* For fragment shaders only */
292 unsigned color_out_index
[PIPE_MAX_COLOR_BUFS
]; /**< the real color output regs */
293 unsigned num_color_outputs
;
294 unsigned color_tmp_index
; /**< fake/temp color output reg */
295 unsigned alpha_ref_index
; /**< immediate constant for alpha ref */
298 unsigned face_input_index
; /**< real fragment shader face reg (bool) */
299 unsigned face_tmp_index
; /**< temp face reg converted to -1 / +1 */
301 unsigned pstipple_sampler_unit
;
303 unsigned fragcoord_input_index
; /**< real fragment position input reg */
304 unsigned fragcoord_tmp_index
; /**< 1/w modified position temp reg */
306 unsigned sample_id_sys_index
; /**< TGSI index of sample id sys value */
308 unsigned sample_pos_sys_index
; /**< TGSI index of sample pos sys value */
309 unsigned sample_pos_tmp_index
; /**< which temp reg has the sample pos */
311 /** TGSI index of sample mask input sys value */
312 unsigned sample_mask_in_sys_index
;
314 /** Which texture units are doing shadow comparison in the FS code */
315 unsigned shadow_compare_units
;
318 unsigned layer_input_index
; /**< TGSI index of layer */
319 unsigned layer_imm_index
; /**< immediate for default layer 0 */
322 /* For geometry shaders only */
324 VGPU10_PRIMITIVE prim_type
;/**< VGPU10 primitive type */
325 VGPU10_PRIMITIVE_TOPOLOGY prim_topology
; /**< VGPU10 primitive topology */
326 unsigned input_size
; /**< size of input arrays */
327 unsigned prim_id_index
; /**< primitive id register index */
328 unsigned max_out_vertices
; /**< maximum number of output vertices */
329 unsigned invocations
;
330 unsigned invocation_id_sys_index
;
332 unsigned viewport_index_out_index
;
333 unsigned viewport_index_tmp_index
;
336 /* For tessellation control shaders only */
338 unsigned vertices_per_patch_index
; /**< vertices_per_patch system value index */
339 unsigned imm_index
; /**< immediate for tcs */
340 unsigned invocation_id_sys_index
; /**< invocation id */
341 unsigned invocation_id_tmp_index
;
342 unsigned instruction_token_pos
; /* token pos for the first instruction */
343 unsigned control_point_input_index
; /* control point input register index */
344 unsigned control_point_addr_index
; /* control point input address register */
345 unsigned control_point_out_index
; /* control point output register index */
346 unsigned control_point_tmp_index
; /* control point temporary register */
347 unsigned control_point_out_count
; /* control point output count */
348 boolean control_point_phase
; /* true if in control point phase */
349 boolean fork_phase_add_signature
; /* true if needs to add signature in fork phase */
350 unsigned patch_generic_out_count
; /* per-patch generic output count */
351 unsigned patch_generic_out_index
; /* per-patch generic output register index*/
352 unsigned patch_generic_tmp_index
; /* per-patch generic temporary register index*/
353 unsigned prim_id_index
; /* primitive id */
355 unsigned out_index
; /* real tessinner output register */
356 unsigned temp_index
; /* tessinner temp register */
357 unsigned tgsi_index
; /* tgsi tessinner output register */
360 unsigned out_index
; /* real tessouter output register */
361 unsigned temp_index
; /* tessouter temp register */
362 unsigned tgsi_index
; /* tgsi tessouter output register */
366 /* For tessellation evaluation shaders only */
368 enum pipe_prim_type prim_mode
;
369 enum pipe_tess_spacing spacing
;
370 boolean vertices_order_cw
;
372 unsigned tesscoord_sys_index
;
373 unsigned prim_id_index
; /* primitive id */
375 unsigned in_index
; /* real tessinner input register */
376 unsigned temp_index
; /* tessinner temp register */
377 unsigned tgsi_index
; /* tgsi tessinner input register */
380 unsigned in_index
; /* real tessouter input register */
381 unsigned temp_index
; /* tessouter temp register */
382 unsigned tgsi_index
; /* tgsi tessouter input register */
386 /* For vertex or geometry shaders */
387 enum clipping_mode clip_mode
;
388 unsigned clip_dist_out_index
; /**< clip distance output register index */
389 unsigned clip_dist_tmp_index
; /**< clip distance temporary register */
390 unsigned clip_dist_so_index
; /**< clip distance shadow copy */
392 /** Index of temporary holding the clipvertex coordinate */
393 unsigned clip_vertex_out_index
; /**< clip vertex output register index */
394 unsigned clip_vertex_tmp_index
; /**< clip vertex temporary index */
396 /* user clip plane constant slot indexes */
397 unsigned clip_plane_const
[PIPE_MAX_CLIP_PLANES
];
399 unsigned num_output_writes
;
400 boolean constant_color_output
;
402 boolean uses_flat_interp
;
404 unsigned reserved_token
; /* index to the reserved token */
405 boolean uses_precise_qualifier
;
407 /* For all shaders: const reg index for RECT coord scaling */
408 unsigned texcoord_scale_index
[PIPE_MAX_SAMPLERS
];
410 /* For all shaders: const reg index for texture buffer size */
411 unsigned texture_buffer_size_index
[PIPE_MAX_SAMPLERS
];
413 /* VS/TCS/TES/GS/FS Linkage info */
414 struct shader_linkage linkage
;
415 struct tgsi_shader_info
*prevShaderInfo
;
417 /* Shader signature */
418 struct svga_shader_signature signature
;
420 bool register_overflow
; /**< Set if we exceed a VGPU10 register limit */
422 /* For pipe_debug_message */
423 struct pipe_debug_callback svga_debug_callback
;
425 /* current loop depth in shader */
426 unsigned current_loop_depth
;
430 static void emit_tcs_input_declarations(struct svga_shader_emitter_v10
*emit
);
431 static void emit_tcs_output_declarations(struct svga_shader_emitter_v10
*emit
);
432 static boolean
emit_temporaries_declaration(struct svga_shader_emitter_v10
*emit
);
433 static boolean
emit_constant_declaration(struct svga_shader_emitter_v10
*emit
);
434 static boolean
emit_sampler_declarations(struct svga_shader_emitter_v10
*emit
);
435 static boolean
emit_resource_declarations(struct svga_shader_emitter_v10
*emit
);
436 static boolean
emit_vgpu10_immediates_block(struct svga_shader_emitter_v10
*emit
);
437 static boolean
emit_index_range_declaration(struct svga_shader_emitter_v10
*emit
);
438 static void emit_temp_prescale_instructions(struct svga_shader_emitter_v10
*emit
);
441 emit_post_helpers(struct svga_shader_emitter_v10
*emit
);
444 emit_vertex(struct svga_shader_emitter_v10
*emit
,
445 const struct tgsi_full_instruction
*inst
);
448 emit_vgpu10_instruction(struct svga_shader_emitter_v10
*emit
,
449 unsigned inst_number
,
450 const struct tgsi_full_instruction
*inst
);
453 emit_input_declaration(struct svga_shader_emitter_v10
*emit
,
454 unsigned opcodeType
, unsigned operandType
,
455 unsigned dim
, unsigned index
, unsigned size
,
456 unsigned name
, unsigned numComp
,
457 unsigned selMode
, unsigned usageMask
,
459 boolean addSignature
,
460 SVGA3dDXSignatureSemanticName sgnName
);
463 create_temp_array(struct svga_shader_emitter_v10
*emit
,
464 unsigned arrayID
, unsigned first
, unsigned count
,
465 unsigned startIndex
);
467 static char err_buf
[128];
470 expand(struct svga_shader_emitter_v10
*emit
)
473 unsigned newsize
= emit
->size
* 2;
475 if (emit
->buf
!= err_buf
)
476 new_buf
= REALLOC(emit
->buf
, emit
->size
, newsize
);
483 emit
->size
= sizeof(err_buf
);
487 emit
->size
= newsize
;
488 emit
->ptr
= new_buf
+ (emit
->ptr
- emit
->buf
);
494 * Create and initialize a new svga_shader_emitter_v10 object.
496 static struct svga_shader_emitter_v10
*
499 struct svga_shader_emitter_v10
*emit
= CALLOC(1, sizeof(*emit
));
504 /* to initialize the output buffer */
514 * Free an svga_shader_emitter_v10 object.
517 free_emitter(struct svga_shader_emitter_v10
*emit
)
520 FREE(emit
->buf
); /* will be NULL if translation succeeded */
524 static inline boolean
525 reserve(struct svga_shader_emitter_v10
*emit
,
528 while (emit
->ptr
- emit
->buf
+ nr_dwords
* sizeof(uint32
) >= emit
->size
) {
537 emit_dword(struct svga_shader_emitter_v10
*emit
, uint32 dword
)
539 if (!reserve(emit
, 1))
542 *(uint32
*)emit
->ptr
= dword
;
543 emit
->ptr
+= sizeof dword
;
548 emit_dwords(struct svga_shader_emitter_v10
*emit
,
549 const uint32
*dwords
,
552 if (!reserve(emit
, nr
))
555 memcpy(emit
->ptr
, dwords
, nr
* sizeof *dwords
);
556 emit
->ptr
+= nr
* sizeof *dwords
;
560 /** Return the number of tokens in the emitter's buffer */
562 emit_get_num_tokens(const struct svga_shader_emitter_v10
*emit
)
564 return (emit
->ptr
- emit
->buf
) / sizeof(unsigned);
569 * Check for register overflow. If we overflow we'll set an
570 * error flag. This function can be called for register declarations
571 * or use as src/dst instruction operands.
572 * \param type register type. One of VGPU10_OPERAND_TYPE_x
573 or VGPU10_OPCODE_DCL_x
574 * \param index the register index
577 check_register_index(struct svga_shader_emitter_v10
*emit
,
578 unsigned operandType
, unsigned index
)
580 bool overflow_before
= emit
->register_overflow
;
582 switch (operandType
) {
583 case VGPU10_OPERAND_TYPE_TEMP
:
584 case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP
:
585 case VGPU10_OPCODE_DCL_TEMPS
:
586 if (index
>= VGPU10_MAX_TEMPS
) {
587 emit
->register_overflow
= TRUE
;
590 case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER
:
591 case VGPU10_OPCODE_DCL_CONSTANT_BUFFER
:
592 if (index
>= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT
) {
593 emit
->register_overflow
= TRUE
;
596 case VGPU10_OPERAND_TYPE_INPUT
:
597 case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID
:
598 case VGPU10_OPCODE_DCL_INPUT
:
599 case VGPU10_OPCODE_DCL_INPUT_SGV
:
600 case VGPU10_OPCODE_DCL_INPUT_SIV
:
601 case VGPU10_OPCODE_DCL_INPUT_PS
:
602 case VGPU10_OPCODE_DCL_INPUT_PS_SGV
:
603 case VGPU10_OPCODE_DCL_INPUT_PS_SIV
:
604 if ((emit
->unit
== PIPE_SHADER_VERTEX
&&
605 index
>= VGPU10_MAX_VS_INPUTS
) ||
606 (emit
->unit
== PIPE_SHADER_GEOMETRY
&&
607 index
>= VGPU10_MAX_GS_INPUTS
) ||
608 (emit
->unit
== PIPE_SHADER_FRAGMENT
&&
609 index
>= VGPU10_MAX_FS_INPUTS
) ||
610 (emit
->unit
== PIPE_SHADER_TESS_CTRL
&&
611 index
>= VGPU11_MAX_HS_INPUT_CONTROL_POINTS
) ||
612 (emit
->unit
== PIPE_SHADER_TESS_EVAL
&&
613 index
>= VGPU11_MAX_DS_INPUT_CONTROL_POINTS
)) {
614 emit
->register_overflow
= TRUE
;
617 case VGPU10_OPERAND_TYPE_OUTPUT
:
618 case VGPU10_OPCODE_DCL_OUTPUT
:
619 case VGPU10_OPCODE_DCL_OUTPUT_SGV
:
620 case VGPU10_OPCODE_DCL_OUTPUT_SIV
:
621 /* Note: we are skipping two output indices in tcs for
622 * tessinner/outer levels. Implementation will not exceed
623 * number of output count but it allows index to go beyond
624 * VGPU11_MAX_HS_OUTPUTS.
625 * Index will never be >= index >= VGPU11_MAX_HS_OUTPUTS + 2
627 if ((emit
->unit
== PIPE_SHADER_VERTEX
&&
628 index
>= VGPU10_MAX_VS_OUTPUTS
) ||
629 (emit
->unit
== PIPE_SHADER_GEOMETRY
&&
630 index
>= VGPU10_MAX_GS_OUTPUTS
) ||
631 (emit
->unit
== PIPE_SHADER_FRAGMENT
&&
632 index
>= VGPU10_MAX_FS_OUTPUTS
) ||
633 (emit
->unit
== PIPE_SHADER_TESS_CTRL
&&
634 index
>= VGPU11_MAX_HS_OUTPUTS
+ 2) ||
635 (emit
->unit
== PIPE_SHADER_TESS_EVAL
&&
636 index
>= VGPU11_MAX_DS_OUTPUTS
)) {
637 emit
->register_overflow
= TRUE
;
640 case VGPU10_OPERAND_TYPE_SAMPLER
:
641 case VGPU10_OPCODE_DCL_SAMPLER
:
642 if (index
>= VGPU10_MAX_SAMPLERS
) {
643 emit
->register_overflow
= TRUE
;
646 case VGPU10_OPERAND_TYPE_RESOURCE
:
647 case VGPU10_OPCODE_DCL_RESOURCE
:
648 if (index
>= VGPU10_MAX_RESOURCES
) {
649 emit
->register_overflow
= TRUE
;
652 case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER
:
653 if (index
>= MAX_IMMEDIATE_COUNT
) {
654 emit
->register_overflow
= TRUE
;
657 case VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK
:
665 if (emit
->register_overflow
&& !overflow_before
) {
666 debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n",
673 * Examine misc state to determine the clipping mode.
676 determine_clipping_mode(struct svga_shader_emitter_v10
*emit
)
678 /* num_written_clipdistance in the shader info for tessellation
679 * control shader is always 0 because the TGSI_PROPERTY_NUM_CLIPDIST_ENABLED
680 * is not defined for this shader. So we go through all the output declarations
681 * to set the num_written_clipdistance. This is just to determine the
684 if (emit
->unit
== PIPE_SHADER_TESS_CTRL
) {
686 for (i
= 0; i
< emit
->info
.num_outputs
; i
++) {
687 if (emit
->info
.output_semantic_name
[i
] == TGSI_SEMANTIC_CLIPDIST
) {
688 emit
->info
.num_written_clipdistance
=
689 4 * (emit
->info
.output_semantic_index
[i
] + 1);
694 if (emit
->info
.num_written_clipdistance
> 0) {
695 emit
->clip_mode
= CLIP_DISTANCE
;
697 else if (emit
->info
.writes_clipvertex
) {
698 emit
->clip_mode
= CLIP_VERTEX
;
700 else if (emit
->key
.clip_plane_enable
&& emit
->key
.last_vertex_stage
) {
702 * Only the last shader in the vertex processing stage needs to
703 * handle the legacy clip mode.
705 emit
->clip_mode
= CLIP_LEGACY
;
708 emit
->clip_mode
= CLIP_NONE
;
714 * For clip distance register declarations and clip distance register
715 * writes we need to mask the declaration usage or instruction writemask
716 * (respectively) against the set of the really-enabled clipping planes.
718 * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables
719 * has a VS that writes to all 8 clip distance registers, but the plane enable
720 * flags are a subset of that.
722 * This function is used to apply the plane enable flags to the register
723 * declaration or instruction writemask.
725 * \param writemask the declaration usage mask or instruction writemask
726 * \param clip_reg_index which clip plane register is being declared/written.
727 * The legal values are 0 and 1 (two clip planes per
728 * register, for a total of 8 clip planes)
731 apply_clip_plane_mask(struct svga_shader_emitter_v10
*emit
,
732 unsigned writemask
, unsigned clip_reg_index
)
736 assert(clip_reg_index
< 2);
738 /* four clip planes per clip register: */
739 shift
= clip_reg_index
* 4;
740 writemask
&= ((emit
->key
.clip_plane_enable
>> shift
) & 0xf);
747 * Translate gallium shader type into VGPU10 type.
749 static VGPU10_PROGRAM_TYPE
750 translate_shader_type(unsigned type
)
753 case PIPE_SHADER_VERTEX
:
754 return VGPU10_VERTEX_SHADER
;
755 case PIPE_SHADER_GEOMETRY
:
756 return VGPU10_GEOMETRY_SHADER
;
757 case PIPE_SHADER_FRAGMENT
:
758 return VGPU10_PIXEL_SHADER
;
759 case PIPE_SHADER_TESS_CTRL
:
760 return VGPU10_HULL_SHADER
;
761 case PIPE_SHADER_TESS_EVAL
:
762 return VGPU10_DOMAIN_SHADER
;
763 case PIPE_SHADER_COMPUTE
:
764 return VGPU10_COMPUTE_SHADER
;
766 assert(!"Unexpected shader type");
767 return VGPU10_VERTEX_SHADER
;
773 * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x
774 * Note: we only need to translate the opcodes for "simple" instructions,
775 * as seen below. All other opcodes are handled/translated specially.
777 static VGPU10_OPCODE_TYPE
778 translate_opcode(enum tgsi_opcode opcode
)
781 case TGSI_OPCODE_MOV
:
782 return VGPU10_OPCODE_MOV
;
783 case TGSI_OPCODE_MUL
:
784 return VGPU10_OPCODE_MUL
;
785 case TGSI_OPCODE_ADD
:
786 return VGPU10_OPCODE_ADD
;
787 case TGSI_OPCODE_DP3
:
788 return VGPU10_OPCODE_DP3
;
789 case TGSI_OPCODE_DP4
:
790 return VGPU10_OPCODE_DP4
;
791 case TGSI_OPCODE_MIN
:
792 return VGPU10_OPCODE_MIN
;
793 case TGSI_OPCODE_MAX
:
794 return VGPU10_OPCODE_MAX
;
795 case TGSI_OPCODE_MAD
:
796 return VGPU10_OPCODE_MAD
;
797 case TGSI_OPCODE_SQRT
:
798 return VGPU10_OPCODE_SQRT
;
799 case TGSI_OPCODE_FRC
:
800 return VGPU10_OPCODE_FRC
;
801 case TGSI_OPCODE_FLR
:
802 return VGPU10_OPCODE_ROUND_NI
;
803 case TGSI_OPCODE_FSEQ
:
804 return VGPU10_OPCODE_EQ
;
805 case TGSI_OPCODE_FSGE
:
806 return VGPU10_OPCODE_GE
;
807 case TGSI_OPCODE_FSNE
:
808 return VGPU10_OPCODE_NE
;
809 case TGSI_OPCODE_DDX
:
810 return VGPU10_OPCODE_DERIV_RTX
;
811 case TGSI_OPCODE_DDY
:
812 return VGPU10_OPCODE_DERIV_RTY
;
813 case TGSI_OPCODE_RET
:
814 return VGPU10_OPCODE_RET
;
815 case TGSI_OPCODE_DIV
:
816 return VGPU10_OPCODE_DIV
;
817 case TGSI_OPCODE_IDIV
:
818 return VGPU10_OPCODE_VMWARE
;
819 case TGSI_OPCODE_DP2
:
820 return VGPU10_OPCODE_DP2
;
821 case TGSI_OPCODE_BRK
:
822 return VGPU10_OPCODE_BREAK
;
824 return VGPU10_OPCODE_IF
;
825 case TGSI_OPCODE_ELSE
:
826 return VGPU10_OPCODE_ELSE
;
827 case TGSI_OPCODE_ENDIF
:
828 return VGPU10_OPCODE_ENDIF
;
829 case TGSI_OPCODE_CEIL
:
830 return VGPU10_OPCODE_ROUND_PI
;
831 case TGSI_OPCODE_I2F
:
832 return VGPU10_OPCODE_ITOF
;
833 case TGSI_OPCODE_NOT
:
834 return VGPU10_OPCODE_NOT
;
835 case TGSI_OPCODE_TRUNC
:
836 return VGPU10_OPCODE_ROUND_Z
;
837 case TGSI_OPCODE_SHL
:
838 return VGPU10_OPCODE_ISHL
;
839 case TGSI_OPCODE_AND
:
840 return VGPU10_OPCODE_AND
;
842 return VGPU10_OPCODE_OR
;
843 case TGSI_OPCODE_XOR
:
844 return VGPU10_OPCODE_XOR
;
845 case TGSI_OPCODE_CONT
:
846 return VGPU10_OPCODE_CONTINUE
;
847 case TGSI_OPCODE_EMIT
:
848 return VGPU10_OPCODE_EMIT
;
849 case TGSI_OPCODE_ENDPRIM
:
850 return VGPU10_OPCODE_CUT
;
851 case TGSI_OPCODE_BGNLOOP
:
852 return VGPU10_OPCODE_LOOP
;
853 case TGSI_OPCODE_ENDLOOP
:
854 return VGPU10_OPCODE_ENDLOOP
;
855 case TGSI_OPCODE_ENDSUB
:
856 return VGPU10_OPCODE_RET
;
857 case TGSI_OPCODE_NOP
:
858 return VGPU10_OPCODE_NOP
;
859 case TGSI_OPCODE_END
:
860 return VGPU10_OPCODE_RET
;
861 case TGSI_OPCODE_F2I
:
862 return VGPU10_OPCODE_FTOI
;
863 case TGSI_OPCODE_IMAX
:
864 return VGPU10_OPCODE_IMAX
;
865 case TGSI_OPCODE_IMIN
:
866 return VGPU10_OPCODE_IMIN
;
867 case TGSI_OPCODE_UDIV
:
868 case TGSI_OPCODE_UMOD
:
869 case TGSI_OPCODE_MOD
:
870 return VGPU10_OPCODE_UDIV
;
871 case TGSI_OPCODE_IMUL_HI
:
872 return VGPU10_OPCODE_IMUL
;
873 case TGSI_OPCODE_INEG
:
874 return VGPU10_OPCODE_INEG
;
875 case TGSI_OPCODE_ISHR
:
876 return VGPU10_OPCODE_ISHR
;
877 case TGSI_OPCODE_ISGE
:
878 return VGPU10_OPCODE_IGE
;
879 case TGSI_OPCODE_ISLT
:
880 return VGPU10_OPCODE_ILT
;
881 case TGSI_OPCODE_F2U
:
882 return VGPU10_OPCODE_FTOU
;
883 case TGSI_OPCODE_UADD
:
884 return VGPU10_OPCODE_IADD
;
885 case TGSI_OPCODE_U2F
:
886 return VGPU10_OPCODE_UTOF
;
887 case TGSI_OPCODE_UCMP
:
888 return VGPU10_OPCODE_MOVC
;
889 case TGSI_OPCODE_UMAD
:
890 return VGPU10_OPCODE_UMAD
;
891 case TGSI_OPCODE_UMAX
:
892 return VGPU10_OPCODE_UMAX
;
893 case TGSI_OPCODE_UMIN
:
894 return VGPU10_OPCODE_UMIN
;
895 case TGSI_OPCODE_UMUL
:
896 case TGSI_OPCODE_UMUL_HI
:
897 return VGPU10_OPCODE_UMUL
;
898 case TGSI_OPCODE_USEQ
:
899 return VGPU10_OPCODE_IEQ
;
900 case TGSI_OPCODE_USGE
:
901 return VGPU10_OPCODE_UGE
;
902 case TGSI_OPCODE_USHR
:
903 return VGPU10_OPCODE_USHR
;
904 case TGSI_OPCODE_USLT
:
905 return VGPU10_OPCODE_ULT
;
906 case TGSI_OPCODE_USNE
:
907 return VGPU10_OPCODE_INE
;
908 case TGSI_OPCODE_SWITCH
:
909 return VGPU10_OPCODE_SWITCH
;
910 case TGSI_OPCODE_CASE
:
911 return VGPU10_OPCODE_CASE
;
912 case TGSI_OPCODE_DEFAULT
:
913 return VGPU10_OPCODE_DEFAULT
;
914 case TGSI_OPCODE_ENDSWITCH
:
915 return VGPU10_OPCODE_ENDSWITCH
;
916 case TGSI_OPCODE_FSLT
:
917 return VGPU10_OPCODE_LT
;
918 case TGSI_OPCODE_ROUND
:
919 return VGPU10_OPCODE_ROUND_NE
;
920 /* Begin SM5 opcodes */
921 case TGSI_OPCODE_F2D
:
922 return VGPU10_OPCODE_FTOD
;
923 case TGSI_OPCODE_D2F
:
924 return VGPU10_OPCODE_DTOF
;
925 case TGSI_OPCODE_DMUL
:
926 return VGPU10_OPCODE_DMUL
;
927 case TGSI_OPCODE_DADD
:
928 return VGPU10_OPCODE_DADD
;
929 case TGSI_OPCODE_DMAX
:
930 return VGPU10_OPCODE_DMAX
;
931 case TGSI_OPCODE_DMIN
:
932 return VGPU10_OPCODE_DMIN
;
933 case TGSI_OPCODE_DSEQ
:
934 return VGPU10_OPCODE_DEQ
;
935 case TGSI_OPCODE_DSGE
:
936 return VGPU10_OPCODE_DGE
;
937 case TGSI_OPCODE_DSLT
:
938 return VGPU10_OPCODE_DLT
;
939 case TGSI_OPCODE_DSNE
:
940 return VGPU10_OPCODE_DNE
;
941 case TGSI_OPCODE_IBFE
:
942 return VGPU10_OPCODE_IBFE
;
943 case TGSI_OPCODE_UBFE
:
944 return VGPU10_OPCODE_UBFE
;
945 case TGSI_OPCODE_BFI
:
946 return VGPU10_OPCODE_BFI
;
947 case TGSI_OPCODE_BREV
:
948 return VGPU10_OPCODE_BFREV
;
949 case TGSI_OPCODE_POPC
:
950 return VGPU10_OPCODE_COUNTBITS
;
951 case TGSI_OPCODE_LSB
:
952 return VGPU10_OPCODE_FIRSTBIT_LO
;
953 case TGSI_OPCODE_IMSB
:
954 return VGPU10_OPCODE_FIRSTBIT_SHI
;
955 case TGSI_OPCODE_UMSB
:
956 return VGPU10_OPCODE_FIRSTBIT_HI
;
957 case TGSI_OPCODE_INTERP_CENTROID
:
958 return VGPU10_OPCODE_EVAL_CENTROID
;
959 case TGSI_OPCODE_INTERP_SAMPLE
:
960 return VGPU10_OPCODE_EVAL_SAMPLE_INDEX
;
961 case TGSI_OPCODE_BARRIER
:
962 return VGPU10_OPCODE_SYNC
;
965 case TGSI_OPCODE_DDIV
:
966 return VGPU10_OPCODE_DDIV
;
967 case TGSI_OPCODE_DRCP
:
968 return VGPU10_OPCODE_DRCP
;
969 case TGSI_OPCODE_D2I
:
970 return VGPU10_OPCODE_DTOI
;
971 case TGSI_OPCODE_D2U
:
972 return VGPU10_OPCODE_DTOU
;
973 case TGSI_OPCODE_I2D
:
974 return VGPU10_OPCODE_ITOD
;
975 case TGSI_OPCODE_U2D
:
976 return VGPU10_OPCODE_UTOD
;
978 case TGSI_OPCODE_SAMPLE_POS
:
979 /* Note: we never actually get this opcode because there's no GLSL
980 * function to query multisample resource sample positions. There's
981 * only the TGSI_SEMANTIC_SAMPLEPOS system value which contains the
982 * position of the current sample in the render target.
985 case TGSI_OPCODE_SAMPLE_INFO
:
986 /* NOTE: we never actually get this opcode because the GLSL compiler
987 * implements the gl_NumSamples variable with a simple constant in the
992 assert(!"Unexpected TGSI opcode in translate_opcode()");
993 return VGPU10_OPCODE_NOP
;
999 * Translate a TGSI register file type into a VGPU10 operand type.
1000 * \param array is the TGSI_FILE_TEMPORARY register an array?
1002 static VGPU10_OPERAND_TYPE
1003 translate_register_file(enum tgsi_file_type file
, boolean array
)
1006 case TGSI_FILE_CONSTANT
:
1007 return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER
;
1008 case TGSI_FILE_INPUT
:
1009 return VGPU10_OPERAND_TYPE_INPUT
;
1010 case TGSI_FILE_OUTPUT
:
1011 return VGPU10_OPERAND_TYPE_OUTPUT
;
1012 case TGSI_FILE_TEMPORARY
:
1013 return array
? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP
1014 : VGPU10_OPERAND_TYPE_TEMP
;
1015 case TGSI_FILE_IMMEDIATE
:
1016 /* all immediates are 32-bit values at this time so
1017 * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time.
1019 return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER
;
1020 case TGSI_FILE_SAMPLER
:
1021 return VGPU10_OPERAND_TYPE_SAMPLER
;
1022 case TGSI_FILE_SYSTEM_VALUE
:
1023 return VGPU10_OPERAND_TYPE_INPUT
;
1025 /* XXX TODO more cases to finish */
1028 assert(!"Bad tgsi register file!");
1029 return VGPU10_OPERAND_TYPE_NULL
;
1035 * Emit a null dst register
1038 emit_null_dst_register(struct svga_shader_emitter_v10
*emit
)
1040 VGPU10OperandToken0 operand
;
1043 operand
.operandType
= VGPU10_OPERAND_TYPE_NULL
;
1044 operand
.numComponents
= VGPU10_OPERAND_0_COMPONENT
;
1046 emit_dword(emit
, operand
.value
);
1051 * If the given register is a temporary, return the array ID.
1055 get_temp_array_id(const struct svga_shader_emitter_v10
*emit
,
1056 enum tgsi_file_type file
, unsigned index
)
1058 if (file
== TGSI_FILE_TEMPORARY
) {
1059 return emit
->temp_map
[index
].arrayId
;
1068 * If the given register is a temporary, convert the index from a TGSI
1069 * TEMPORARY index to a VGPU10 temp index.
1072 remap_temp_index(const struct svga_shader_emitter_v10
*emit
,
1073 enum tgsi_file_type file
, unsigned index
)
1075 if (file
== TGSI_FILE_TEMPORARY
) {
1076 return emit
->temp_map
[index
].index
;
1085 * Setup the operand0 fields related to indexing (1D, 2D, relative, etc).
1086 * Note: the operandType field must already be initialized.
1087 * \param file the register file being accessed
1088 * \param indirect using indirect addressing of the register file?
1089 * \param index2D if true, 2-D indexing is being used (const or temp registers)
1090 * \param indirect2D if true, 2-D indirect indexing being used (for const buf)
1092 static VGPU10OperandToken0
1093 setup_operand0_indexing(struct svga_shader_emitter_v10
*emit
,
1094 VGPU10OperandToken0 operand0
,
1095 enum tgsi_file_type file
,
1097 boolean index2D
, bool indirect2D
)
1099 VGPU10_OPERAND_INDEX_REPRESENTATION index0Rep
, index1Rep
;
1100 VGPU10_OPERAND_INDEX_DIMENSION indexDim
;
1103 * Compute index dimensions
1105 if (operand0
.operandType
== VGPU10_OPERAND_TYPE_IMMEDIATE32
||
1106 operand0
.operandType
== VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID
||
1107 operand0
.operandType
== VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID
||
1108 operand0
.operandType
== VGPU10_OPERAND_TYPE_INPUT_THREAD_ID
||
1109 operand0
.operandType
== VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP
||
1110 operand0
.operandType
== VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID
) {
1111 /* there's no swizzle for in-line immediates */
1112 indexDim
= VGPU10_OPERAND_INDEX_0D
;
1113 assert(operand0
.selectionMode
== 0);
1115 else if (operand0
.operandType
== VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT
) {
1116 indexDim
= VGPU10_OPERAND_INDEX_0D
;
1119 indexDim
= index2D
? VGPU10_OPERAND_INDEX_2D
: VGPU10_OPERAND_INDEX_1D
;
1123 * Compute index representation(s) (immediate vs relative).
1125 if (indexDim
== VGPU10_OPERAND_INDEX_2D
) {
1126 index0Rep
= indirect2D
? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1127 : VGPU10_OPERAND_INDEX_IMMEDIATE32
;
1129 index1Rep
= indirect
? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1130 : VGPU10_OPERAND_INDEX_IMMEDIATE32
;
1132 else if (indexDim
== VGPU10_OPERAND_INDEX_1D
) {
1133 index0Rep
= indirect
? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1134 : VGPU10_OPERAND_INDEX_IMMEDIATE32
;
1143 operand0
.indexDimension
= indexDim
;
1144 operand0
.index0Representation
= index0Rep
;
1145 operand0
.index1Representation
= index1Rep
;
1152 * Emit the operand for expressing an address register for indirect indexing.
1153 * Note that the address register is really just a temp register.
1154 * \param addr_reg_index which address register to use
1157 emit_indirect_register(struct svga_shader_emitter_v10
*emit
,
1158 unsigned addr_reg_index
)
1160 unsigned tmp_reg_index
;
1161 VGPU10OperandToken0 operand0
;
1163 assert(addr_reg_index
< MAX_VGPU10_ADDR_REGS
);
1165 tmp_reg_index
= emit
->address_reg_index
[addr_reg_index
];
1167 /* operand0 is a simple temporary register, selecting one component */
1169 operand0
.operandType
= VGPU10_OPERAND_TYPE_TEMP
;
1170 operand0
.numComponents
= VGPU10_OPERAND_4_COMPONENT
;
1171 operand0
.indexDimension
= VGPU10_OPERAND_INDEX_1D
;
1172 operand0
.index0Representation
= VGPU10_OPERAND_INDEX_IMMEDIATE32
;
1173 operand0
.selectionMode
= VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE
;
1174 operand0
.swizzleX
= 0;
1175 operand0
.swizzleY
= 1;
1176 operand0
.swizzleZ
= 2;
1177 operand0
.swizzleW
= 3;
1179 emit_dword(emit
, operand0
.value
);
1180 emit_dword(emit
, remap_temp_index(emit
, TGSI_FILE_TEMPORARY
, tmp_reg_index
));
1185 * Translate the dst register of a TGSI instruction and emit VGPU10 tokens.
1186 * \param emit the emitter context
1187 * \param reg the TGSI dst register to translate
1190 emit_dst_register(struct svga_shader_emitter_v10
*emit
,
1191 const struct tgsi_full_dst_register
*reg
)
1193 enum tgsi_file_type file
= reg
->Register
.File
;
1194 unsigned index
= reg
->Register
.Index
;
1195 const enum tgsi_semantic sem_name
= emit
->info
.output_semantic_name
[index
];
1196 const unsigned sem_index
= emit
->info
.output_semantic_index
[index
];
1197 unsigned writemask
= reg
->Register
.WriteMask
;
1198 const boolean indirect
= reg
->Register
.Indirect
;
1199 unsigned tempArrayId
= get_temp_array_id(emit
, file
, index
);
1200 boolean index2d
= reg
->Register
.Dimension
|| tempArrayId
> 0;
1201 VGPU10OperandToken0 operand0
;
1203 if (file
== TGSI_FILE_TEMPORARY
) {
1204 emit
->temp_map
[index
].initialized
= TRUE
;
1207 if (file
== TGSI_FILE_OUTPUT
) {
1208 if (emit
->unit
== PIPE_SHADER_VERTEX
||
1209 emit
->unit
== PIPE_SHADER_GEOMETRY
||
1210 emit
->unit
== PIPE_SHADER_TESS_EVAL
) {
1211 if (index
== emit
->vposition
.out_index
&&
1212 emit
->vposition
.tmp_index
!= INVALID_INDEX
) {
1213 /* replace OUTPUT[POS] with TEMP[POS]. We need to store the
1214 * vertex position result in a temporary so that we can modify
1215 * it in the post_helper() code.
1217 file
= TGSI_FILE_TEMPORARY
;
1218 index
= emit
->vposition
.tmp_index
;
1220 else if (sem_name
== TGSI_SEMANTIC_CLIPDIST
&&
1221 emit
->clip_dist_tmp_index
!= INVALID_INDEX
) {
1222 /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1223 * We store the clip distance in a temporary first, then
1224 * we'll copy it to the shadow copy and to CLIPDIST with the
1225 * enabled planes mask in emit_clip_distance_instructions().
1227 file
= TGSI_FILE_TEMPORARY
;
1228 index
= emit
->clip_dist_tmp_index
+ sem_index
;
1230 else if (sem_name
== TGSI_SEMANTIC_CLIPVERTEX
&&
1231 emit
->clip_vertex_tmp_index
!= INVALID_INDEX
) {
1232 /* replace the CLIPVERTEX output register with a temporary */
1233 assert(emit
->clip_mode
== CLIP_VERTEX
);
1234 assert(sem_index
== 0);
1235 file
= TGSI_FILE_TEMPORARY
;
1236 index
= emit
->clip_vertex_tmp_index
;
1238 else if (sem_name
== TGSI_SEMANTIC_COLOR
&&
1239 emit
->key
.clamp_vertex_color
) {
1241 /* set the saturate modifier of the instruction
1242 * to clamp the vertex color.
1244 VGPU10OpcodeToken0
*token
=
1245 (VGPU10OpcodeToken0
*)emit
->buf
+ emit
->inst_start_token
;
1246 token
->saturate
= TRUE
;
1248 else if (sem_name
== TGSI_SEMANTIC_VIEWPORT_INDEX
&&
1249 emit
->gs
.viewport_index_out_index
!= INVALID_INDEX
) {
1250 file
= TGSI_FILE_TEMPORARY
;
1251 index
= emit
->gs
.viewport_index_tmp_index
;
1254 else if (emit
->unit
== PIPE_SHADER_FRAGMENT
) {
1255 if (sem_name
== TGSI_SEMANTIC_POSITION
) {
1256 /* Fragment depth output register */
1258 operand0
.operandType
= VGPU10_OPERAND_TYPE_OUTPUT_DEPTH
;
1259 operand0
.indexDimension
= VGPU10_OPERAND_INDEX_0D
;
1260 operand0
.numComponents
= VGPU10_OPERAND_1_COMPONENT
;
1261 emit_dword(emit
, operand0
.value
);
1264 else if (sem_name
== TGSI_SEMANTIC_SAMPLEMASK
) {
1265 /* Fragment sample mask output */
1267 operand0
.operandType
= VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK
;
1268 operand0
.indexDimension
= VGPU10_OPERAND_INDEX_0D
;
1269 operand0
.numComponents
= VGPU10_OPERAND_1_COMPONENT
;
1270 emit_dword(emit
, operand0
.value
);
1273 else if (index
== emit
->fs
.color_out_index
[0] &&
1274 emit
->fs
.color_tmp_index
!= INVALID_INDEX
) {
1275 /* replace OUTPUT[COLOR] with TEMP[COLOR]. We need to store the
1276 * fragment color result in a temporary so that we can read it
1277 * it in the post_helper() code.
1279 file
= TGSI_FILE_TEMPORARY
;
1280 index
= emit
->fs
.color_tmp_index
;
1283 /* Typically, for fragment shaders, the output register index
1284 * matches the color semantic index. But not when we write to
1285 * the fragment depth register. In that case, OUT[0] will be
1286 * fragdepth and OUT[1] will be the 0th color output. We need
1287 * to use the semantic index for color outputs.
1289 assert(sem_name
== TGSI_SEMANTIC_COLOR
);
1290 index
= emit
->info
.output_semantic_index
[index
];
1292 emit
->num_output_writes
++;
1295 else if (emit
->unit
== PIPE_SHADER_TESS_CTRL
) {
1296 if (index
== emit
->tcs
.inner
.tgsi_index
) {
1297 /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1298 * in temporary for now so that will be store into appropriate
1299 * registers in post_helper() in patch constant phase.
1301 if (emit
->tcs
.control_point_phase
) {
1302 /* Discard writing into tessfactor in control point phase */
1303 emit
->discard_instruction
= TRUE
;
1306 file
= TGSI_FILE_TEMPORARY
;
1307 index
= emit
->tcs
.inner
.temp_index
;
1310 else if (index
== emit
->tcs
.outer
.tgsi_index
) {
1311 /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1312 * in temporary for now so that will be store into appropriate
1313 * registers in post_helper().
1315 if (emit
->tcs
.control_point_phase
) {
1316 /* Discard writing into tessfactor in control point phase */
1317 emit
->discard_instruction
= TRUE
;
1320 file
= TGSI_FILE_TEMPORARY
;
1321 index
= emit
->tcs
.outer
.temp_index
;
1324 else if (index
>= emit
->tcs
.patch_generic_out_index
&&
1325 index
< (emit
->tcs
.patch_generic_out_index
+
1326 emit
->tcs
.patch_generic_out_count
)) {
1327 if (emit
->tcs
.control_point_phase
) {
1328 /* Discard writing into generic patch constant outputs in
1329 control point phase */
1330 emit
->discard_instruction
= TRUE
;
1333 if (emit
->reemit_instruction
) {
1334 /* Store results of reemitted instruction in temporary register. */
1335 file
= TGSI_FILE_TEMPORARY
;
1336 index
= emit
->tcs
.patch_generic_tmp_index
+
1337 (index
- emit
->tcs
.patch_generic_out_index
);
1339 * Temporaries for patch constant data can be done
1340 * as indexable temporaries.
1342 tempArrayId
= get_temp_array_id(emit
, file
, index
);
1343 index2d
= tempArrayId
> 0;
1345 emit
->reemit_instruction
= FALSE
;
1348 /* If per-patch outputs is been read in shader, we
1349 * reemit instruction and store results in temporaries in
1350 * patch constant phase. */
1351 if (emit
->info
.reads_perpatch_outputs
) {
1352 emit
->reemit_instruction
= TRUE
;
1357 else if (reg
->Register
.Dimension
) {
1358 /* Only control point outputs are declared 2D in tgsi */
1359 if (emit
->tcs
.control_point_phase
) {
1360 if (emit
->reemit_instruction
) {
1361 /* Store results of reemitted instruction in temporary register. */
1363 file
= TGSI_FILE_TEMPORARY
;
1364 index
= emit
->tcs
.control_point_tmp_index
+
1365 (index
- emit
->tcs
.control_point_out_index
);
1366 emit
->reemit_instruction
= FALSE
;
1369 /* The mapped control point outputs are 1-D */
1371 if (emit
->info
.reads_pervertex_outputs
) {
1372 /* If per-vertex outputs is been read in shader, we
1373 * reemit instruction and store results in temporaries
1374 * control point phase. */
1375 emit
->reemit_instruction
= TRUE
;
1379 if (sem_name
== TGSI_SEMANTIC_CLIPDIST
&&
1380 emit
->clip_dist_tmp_index
!= INVALID_INDEX
) {
1381 /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1382 * We store the clip distance in a temporary first, then
1383 * we'll copy it to the shadow copy and to CLIPDIST with the
1384 * enabled planes mask in emit_clip_distance_instructions().
1386 file
= TGSI_FILE_TEMPORARY
;
1387 index
= emit
->clip_dist_tmp_index
+ sem_index
;
1389 else if (sem_name
== TGSI_SEMANTIC_CLIPVERTEX
&&
1390 emit
->clip_vertex_tmp_index
!= INVALID_INDEX
) {
1391 /* replace the CLIPVERTEX output register with a temporary */
1392 assert(emit
->clip_mode
== CLIP_VERTEX
);
1393 assert(sem_index
== 0);
1394 file
= TGSI_FILE_TEMPORARY
;
1395 index
= emit
->clip_vertex_tmp_index
;
1399 /* Discard writing into control point outputs in
1400 patch constant phase */
1401 emit
->discard_instruction
= TRUE
;
1407 /* init operand tokens to all zero */
1410 operand0
.numComponents
= VGPU10_OPERAND_4_COMPONENT
;
1412 /* the operand has a writemask */
1413 operand0
.selectionMode
= VGPU10_OPERAND_4_COMPONENT_MASK_MODE
;
1415 /* Which of the four dest components to write to. Note that we can use a
1416 * simple assignment here since TGSI writemasks match VGPU10 writemasks.
1418 STATIC_ASSERT(TGSI_WRITEMASK_X
== VGPU10_OPERAND_4_COMPONENT_MASK_X
);
1419 operand0
.mask
= writemask
;
1421 /* translate TGSI register file type to VGPU10 operand type */
1422 operand0
.operandType
= translate_register_file(file
, tempArrayId
> 0);
1424 check_register_index(emit
, operand0
.operandType
, index
);
1426 operand0
= setup_operand0_indexing(emit
, operand0
, file
, indirect
,
1430 emit_dword(emit
, operand0
.value
);
1431 if (tempArrayId
> 0) {
1432 emit_dword(emit
, tempArrayId
);
1435 emit_dword(emit
, remap_temp_index(emit
, file
, index
));
1438 emit_indirect_register(emit
, reg
->Indirect
.Index
);
1444 * Check if temporary register needs to be initialize when
1445 * shader is not using indirect addressing for temporary and uninitialized
1446 * temporary is not used in loop. In these two scenarios, we cannot
1447 * determine if temporary is initialized or not.
1450 need_temp_reg_initialization(struct svga_shader_emitter_v10
*emit
,
1453 if (!(emit
->info
.indirect_files
&& (1u << TGSI_FILE_TEMPORARY
))
1454 && emit
->current_loop_depth
== 0) {
1455 if (!emit
->temp_map
[index
].initialized
&&
1456 emit
->temp_map
[index
].index
< emit
->num_shader_temps
) {
1466 * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
1467 * In quite a few cases, we do register substitution. For example, if
1468 * the TGSI register is the front/back-face register, we replace that with
1469 * a temp register containing a value we computed earlier.
1472 emit_src_register(struct svga_shader_emitter_v10
*emit
,
1473 const struct tgsi_full_src_register
*reg
)
1475 enum tgsi_file_type file
= reg
->Register
.File
;
1476 unsigned index
= reg
->Register
.Index
;
1477 const boolean indirect
= reg
->Register
.Indirect
;
1478 unsigned tempArrayId
= get_temp_array_id(emit
, file
, index
);
1479 boolean index2d
= (reg
->Register
.Dimension
||
1481 file
== TGSI_FILE_CONSTANT
);
1482 unsigned index2
= tempArrayId
> 0 ? tempArrayId
: reg
->Dimension
.Index
;
1483 boolean indirect2d
= reg
->Dimension
.Indirect
;
1484 unsigned swizzleX
= reg
->Register
.SwizzleX
;
1485 unsigned swizzleY
= reg
->Register
.SwizzleY
;
1486 unsigned swizzleZ
= reg
->Register
.SwizzleZ
;
1487 unsigned swizzleW
= reg
->Register
.SwizzleW
;
1488 const boolean absolute
= reg
->Register
.Absolute
;
1489 const boolean negate
= reg
->Register
.Negate
;
1490 VGPU10OperandToken0 operand0
;
1491 VGPU10OperandToken1 operand1
;
1493 operand0
.value
= operand1
.value
= 0;
1495 if (emit
->unit
== PIPE_SHADER_FRAGMENT
){
1496 if (file
== TGSI_FILE_INPUT
) {
1497 if (index
== emit
->fs
.face_input_index
) {
1498 /* Replace INPUT[FACE] with TEMP[FACE] */
1499 file
= TGSI_FILE_TEMPORARY
;
1500 index
= emit
->fs
.face_tmp_index
;
1502 else if (index
== emit
->fs
.fragcoord_input_index
) {
1503 /* Replace INPUT[POSITION] with TEMP[POSITION] */
1504 file
= TGSI_FILE_TEMPORARY
;
1505 index
= emit
->fs
.fragcoord_tmp_index
;
1507 else if (index
== emit
->fs
.layer_input_index
) {
1508 /* Replace INPUT[LAYER] with zero.x */
1509 file
= TGSI_FILE_IMMEDIATE
;
1510 index
= emit
->fs
.layer_imm_index
;
1511 swizzleX
= swizzleY
= swizzleZ
= swizzleW
= TGSI_SWIZZLE_X
;
1514 /* We remap fragment shader inputs to that FS input indexes
1515 * match up with VS/GS output indexes.
1517 index
= emit
->linkage
.input_map
[index
];
1520 else if (file
== TGSI_FILE_SYSTEM_VALUE
) {
1521 if (index
== emit
->fs
.sample_pos_sys_index
) {
1522 assert(emit
->version
>= 41);
1523 /* Current sample position is in a temp register */
1524 file
= TGSI_FILE_TEMPORARY
;
1525 index
= emit
->fs
.sample_pos_tmp_index
;
1527 else if (index
== emit
->fs
.sample_mask_in_sys_index
) {
1528 /* Emitted as vCoverage0.x */
1529 /* According to GLSL spec, the gl_SampleMaskIn array has ceil(s / 32)
1530 * elements where s is the maximum number of color samples supported
1531 * by the implementation. With current implementation, we should not
1532 * have more than one element. So assert if Index != 0
1534 assert((!reg
->Register
.Indirect
&& reg
->Register
.Index
== 0) ||
1535 reg
->Register
.Indirect
);
1537 operand0
.operandType
= VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK
;
1538 operand0
.indexDimension
= VGPU10_OPERAND_INDEX_0D
;
1539 operand0
.numComponents
= VGPU10_OPERAND_4_COMPONENT
;
1540 operand0
.selectionMode
= VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE
;
1541 emit_dword(emit
, operand0
.value
);
1545 /* Map the TGSI system value to a VGPU10 input register */
1546 assert(index
< ARRAY_SIZE(emit
->system_value_indexes
));
1547 file
= TGSI_FILE_INPUT
;
1548 index
= emit
->system_value_indexes
[index
];
1552 else if (emit
->unit
== PIPE_SHADER_GEOMETRY
) {
1553 if (file
== TGSI_FILE_INPUT
) {
1554 if (index
== emit
->gs
.prim_id_index
) {
1555 operand0
.numComponents
= VGPU10_OPERAND_0_COMPONENT
;
1556 operand0
.operandType
= VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID
;
1558 index
= emit
->linkage
.input_map
[index
];
1560 else if (file
== TGSI_FILE_SYSTEM_VALUE
&&
1561 index
== emit
->gs
.invocation_id_sys_index
) {
1562 /* Emitted as vGSInstanceID0.x */
1563 operand0
.numComponents
= VGPU10_OPERAND_4_COMPONENT
;
1564 operand0
.operandType
= VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID
;
1568 else if (emit
->unit
== PIPE_SHADER_VERTEX
) {
1569 if (file
== TGSI_FILE_INPUT
) {
1570 /* if input is adjusted... */
1571 if ((emit
->key
.vs
.adjust_attrib_w_1
|
1572 emit
->key
.vs
.adjust_attrib_itof
|
1573 emit
->key
.vs
.adjust_attrib_utof
|
1574 emit
->key
.vs
.attrib_is_bgra
|
1575 emit
->key
.vs
.attrib_puint_to_snorm
|
1576 emit
->key
.vs
.attrib_puint_to_uscaled
|
1577 emit
->key
.vs
.attrib_puint_to_sscaled
) & (1 << index
)) {
1578 file
= TGSI_FILE_TEMPORARY
;
1579 index
= emit
->vs
.adjusted_input
[index
];
1582 else if (file
== TGSI_FILE_SYSTEM_VALUE
) {
1583 if (index
== emit
->vs
.vertex_id_sys_index
&&
1584 emit
->vs
.vertex_id_tmp_index
!= INVALID_INDEX
) {
1585 file
= TGSI_FILE_TEMPORARY
;
1586 index
= emit
->vs
.vertex_id_tmp_index
;
1587 swizzleX
= swizzleY
= swizzleZ
= swizzleW
= TGSI_SWIZZLE_X
;
1590 /* Map the TGSI system value to a VGPU10 input register */
1591 assert(index
< ARRAY_SIZE(emit
->system_value_indexes
));
1592 file
= TGSI_FILE_INPUT
;
1593 index
= emit
->system_value_indexes
[index
];
1597 else if (emit
->unit
== PIPE_SHADER_TESS_CTRL
) {
1599 if (file
== TGSI_FILE_SYSTEM_VALUE
) {
1600 if (index
== emit
->tcs
.vertices_per_patch_index
) {
1602 * if source register is the system value for vertices_per_patch,
1603 * replace it with the immediate.
1605 file
= TGSI_FILE_IMMEDIATE
;
1606 index
= emit
->tcs
.imm_index
;
1607 swizzleX
= swizzleY
= swizzleZ
= swizzleW
= TGSI_SWIZZLE_X
;
1609 else if (index
== emit
->tcs
.invocation_id_sys_index
) {
1610 if (emit
->tcs
.control_point_phase
) {
1612 * Emitted as vOutputControlPointID.x
1614 operand0
.numComponents
= VGPU10_OPERAND_1_COMPONENT
;
1615 operand0
.operandType
= VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID
;
1616 operand0
.selectionMode
= VGPU10_OPERAND_4_COMPONENT_MASK_MODE
;
1618 emit_dword(emit
, operand0
.value
);
1622 /* There is no control point ID input declaration in
1623 * the patch constant phase in hull shader.
1624 * Since for now we are emitting all instructions in
1625 * the patch constant phase, we are replacing the
1626 * control point ID reference with the immediate 0.
1628 file
= TGSI_FILE_IMMEDIATE
;
1629 index
= emit
->tcs
.imm_index
;
1630 swizzleX
= swizzleY
= swizzleZ
= swizzleW
= TGSI_SWIZZLE_W
;
1633 else if (index
== emit
->tcs
.prim_id_index
) {
1635 * Emitted as vPrim.x
1637 operand0
.numComponents
= VGPU10_OPERAND_1_COMPONENT
;
1638 operand0
.operandType
= VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID
;
1642 else if (file
== TGSI_FILE_INPUT
) {
1643 index
= emit
->linkage
.input_map
[index
];
1644 if (!emit
->tcs
.control_point_phase
) {
1645 /* Emitted as vicp */
1646 operand0
.numComponents
= VGPU10_OPERAND_4_COMPONENT
;
1647 operand0
.operandType
= VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT
;
1648 assert(reg
->Register
.Dimension
);
1651 else if (file
== TGSI_FILE_OUTPUT
) {
1652 if ((index
>= emit
->tcs
.patch_generic_out_index
&&
1653 index
< (emit
->tcs
.patch_generic_out_index
+
1654 emit
->tcs
.patch_generic_out_count
)) ||
1655 index
== emit
->tcs
.inner
.tgsi_index
||
1656 index
== emit
->tcs
.outer
.tgsi_index
) {
1657 if (emit
->tcs
.control_point_phase
) {
1658 emit
->discard_instruction
= TRUE
;
1661 /* Device doesn't allow reading from output so
1662 * use corresponding temporary register as source */
1663 file
= TGSI_FILE_TEMPORARY
;
1664 if (index
== emit
->tcs
.inner
.tgsi_index
) {
1665 index
= emit
->tcs
.inner
.temp_index
;
1667 else if (index
== emit
->tcs
.outer
.tgsi_index
) {
1668 index
= emit
->tcs
.outer
.temp_index
;
1671 index
= emit
->tcs
.patch_generic_tmp_index
+
1672 (index
- emit
->tcs
.patch_generic_out_index
);
1676 * Temporaries for patch constant data can be done
1677 * as indexable temporaries.
1679 tempArrayId
= get_temp_array_id(emit
, file
, index
);
1680 index2d
= tempArrayId
> 0;
1681 index2
= tempArrayId
> 0 ? tempArrayId
: reg
->Dimension
.Index
;
1685 if (emit
->tcs
.control_point_phase
) {
1686 /* Device doesn't allow reading from output so
1687 * use corresponding temporary register as source */
1688 file
= TGSI_FILE_TEMPORARY
;
1690 index
= emit
->tcs
.control_point_tmp_index
+
1691 (index
- emit
->tcs
.control_point_out_index
);
1694 emit
->discard_instruction
= TRUE
;
1699 else if (emit
->unit
== PIPE_SHADER_TESS_EVAL
) {
1700 if (file
== TGSI_FILE_SYSTEM_VALUE
) {
1701 if (index
== emit
->tes
.tesscoord_sys_index
) {
1703 * Emitted as vDomain
1705 operand0
.numComponents
= VGPU10_OPERAND_4_COMPONENT
;
1706 operand0
.operandType
= VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT
;
1709 else if (index
== emit
->tes
.inner
.tgsi_index
) {
1710 file
= TGSI_FILE_TEMPORARY
;
1711 index
= emit
->tes
.inner
.temp_index
;
1713 else if (index
== emit
->tes
.outer
.tgsi_index
) {
1714 file
= TGSI_FILE_TEMPORARY
;
1715 index
= emit
->tes
.outer
.temp_index
;
1717 else if (index
== emit
->tes
.prim_id_index
) {
1719 * Emitted as vPrim.x
1721 operand0
.numComponents
= VGPU10_OPERAND_1_COMPONENT
;
1722 operand0
.operandType
= VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID
;
1727 else if (file
== TGSI_FILE_INPUT
) {
1729 /* 2D input is emitted as vcp (input control point). */
1730 operand0
.operandType
= VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT
;
1731 operand0
.numComponents
= VGPU10_OPERAND_4_COMPONENT
;
1733 /* index specifies the element index and is remapped
1734 * to align with the tcs output index.
1736 index
= emit
->linkage
.input_map
[index
];
1738 assert(index2
< emit
->key
.tes
.vertices_per_patch
);
1741 if (index
< emit
->key
.tes
.tessfactor_index
)
1742 /* index specifies the generic patch index.
1743 * Remapped to match up with the tcs output index.
1745 index
= emit
->linkage
.input_map
[index
];
1747 operand0
.operandType
= VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT
;
1748 operand0
.numComponents
= VGPU10_OPERAND_4_COMPONENT
;
1753 if (file
== TGSI_FILE_ADDRESS
) {
1754 index
= emit
->address_reg_index
[index
];
1755 file
= TGSI_FILE_TEMPORARY
;
1758 if (file
== TGSI_FILE_TEMPORARY
) {
1759 if (need_temp_reg_initialization(emit
, index
)) {
1760 emit
->initialize_temp_index
= index
;
1761 emit
->discard_instruction
= TRUE
;
1765 if (operand0
.value
== 0) {
1766 /* if operand0 was not set above for a special case, do the general
1769 operand0
.numComponents
= VGPU10_OPERAND_4_COMPONENT
;
1770 operand0
.operandType
= translate_register_file(file
, tempArrayId
> 0);
1772 operand0
= setup_operand0_indexing(emit
, operand0
, file
, indirect
,
1773 index2d
, indirect2d
);
1775 if (operand0
.operandType
!= VGPU10_OPERAND_TYPE_IMMEDIATE32
&&
1776 operand0
.operandType
!= VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID
) {
1777 /* there's no swizzle for in-line immediates */
1778 if (swizzleX
== swizzleY
&&
1779 swizzleX
== swizzleZ
&&
1780 swizzleX
== swizzleW
) {
1781 operand0
.selectionMode
= VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE
;
1784 operand0
.selectionMode
= VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE
;
1787 operand0
.swizzleX
= swizzleX
;
1788 operand0
.swizzleY
= swizzleY
;
1789 operand0
.swizzleZ
= swizzleZ
;
1790 operand0
.swizzleW
= swizzleW
;
1792 if (absolute
|| negate
) {
1793 operand0
.extended
= 1;
1794 operand1
.extendedOperandType
= VGPU10_EXTENDED_OPERAND_MODIFIER
;
1795 if (absolute
&& !negate
)
1796 operand1
.operandModifier
= VGPU10_OPERAND_MODIFIER_ABS
;
1797 if (!absolute
&& negate
)
1798 operand1
.operandModifier
= VGPU10_OPERAND_MODIFIER_NEG
;
1799 if (absolute
&& negate
)
1800 operand1
.operandModifier
= VGPU10_OPERAND_MODIFIER_ABSNEG
;
1804 /* Emit the operand tokens */
1805 emit_dword(emit
, operand0
.value
);
1806 if (operand0
.extended
)
1807 emit_dword(emit
, operand1
.value
);
1809 if (operand0
.operandType
== VGPU10_OPERAND_TYPE_IMMEDIATE32
) {
1810 /* Emit the four float/int in-line immediate values */
1812 assert(index
< ARRAY_SIZE(emit
->immediates
));
1813 assert(file
== TGSI_FILE_IMMEDIATE
);
1814 assert(swizzleX
< 4);
1815 assert(swizzleY
< 4);
1816 assert(swizzleZ
< 4);
1817 assert(swizzleW
< 4);
1818 c
= (unsigned *) emit
->immediates
[index
];
1819 emit_dword(emit
, c
[swizzleX
]);
1820 emit_dword(emit
, c
[swizzleY
]);
1821 emit_dword(emit
, c
[swizzleZ
]);
1822 emit_dword(emit
, c
[swizzleW
]);
1824 else if (operand0
.indexDimension
>= VGPU10_OPERAND_INDEX_1D
) {
1825 /* Emit the register index(es) */
1827 emit_dword(emit
, index2
);
1830 emit_indirect_register(emit
, reg
->DimIndirect
.Index
);
1834 emit_dword(emit
, remap_temp_index(emit
, file
, index
));
1837 emit_indirect_register(emit
, reg
->Indirect
.Index
);
1844 * Emit a resource operand (for use with a SAMPLE instruction).
1847 emit_resource_register(struct svga_shader_emitter_v10
*emit
,
1848 unsigned resource_number
)
1850 VGPU10OperandToken0 operand0
;
1852 check_register_index(emit
, VGPU10_OPERAND_TYPE_RESOURCE
, resource_number
);
1857 operand0
.operandType
= VGPU10_OPERAND_TYPE_RESOURCE
;
1858 operand0
.indexDimension
= VGPU10_OPERAND_INDEX_1D
;
1859 operand0
.numComponents
= VGPU10_OPERAND_4_COMPONENT
;
1860 operand0
.selectionMode
= VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE
;
1861 operand0
.swizzleX
= VGPU10_COMPONENT_X
;
1862 operand0
.swizzleY
= VGPU10_COMPONENT_Y
;
1863 operand0
.swizzleZ
= VGPU10_COMPONENT_Z
;
1864 operand0
.swizzleW
= VGPU10_COMPONENT_W
;
1866 emit_dword(emit
, operand0
.value
);
1867 emit_dword(emit
, resource_number
);
1872 * Emit a sampler operand (for use with a SAMPLE instruction).
1875 emit_sampler_register(struct svga_shader_emitter_v10
*emit
,
1876 unsigned sampler_number
)
1878 VGPU10OperandToken0 operand0
;
1880 check_register_index(emit
, VGPU10_OPERAND_TYPE_SAMPLER
, sampler_number
);
1885 operand0
.operandType
= VGPU10_OPERAND_TYPE_SAMPLER
;
1886 operand0
.indexDimension
= VGPU10_OPERAND_INDEX_1D
;
1888 emit_dword(emit
, operand0
.value
);
1889 emit_dword(emit
, sampler_number
);
1894 * Emit an operand which reads the IS_FRONT_FACING register.
1897 emit_face_register(struct svga_shader_emitter_v10
*emit
)
1899 VGPU10OperandToken0 operand0
;
1900 unsigned index
= emit
->linkage
.input_map
[emit
->fs
.face_input_index
];
1905 operand0
.operandType
= VGPU10_OPERAND_TYPE_INPUT
;
1906 operand0
.indexDimension
= VGPU10_OPERAND_INDEX_1D
;
1907 operand0
.selectionMode
= VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE
;
1908 operand0
.numComponents
= VGPU10_OPERAND_4_COMPONENT
;
1910 operand0
.swizzleX
= VGPU10_COMPONENT_X
;
1911 operand0
.swizzleY
= VGPU10_COMPONENT_X
;
1912 operand0
.swizzleZ
= VGPU10_COMPONENT_X
;
1913 operand0
.swizzleW
= VGPU10_COMPONENT_X
;
1915 emit_dword(emit
, operand0
.value
);
1916 emit_dword(emit
, index
);
1921 * Emit tokens for the "rasterizer" register used by the SAMPLE_POS
1925 emit_rasterizer_register(struct svga_shader_emitter_v10
*emit
)
1927 VGPU10OperandToken0 operand0
;
1932 /* No register index for rasterizer index (there's only one) */
1933 operand0
.operandType
= VGPU10_OPERAND_TYPE_RASTERIZER
;
1934 operand0
.indexDimension
= VGPU10_OPERAND_INDEX_0D
;
1935 operand0
.numComponents
= VGPU10_OPERAND_4_COMPONENT
;
1936 operand0
.selectionMode
= VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE
;
1937 operand0
.swizzleX
= VGPU10_COMPONENT_X
;
1938 operand0
.swizzleY
= VGPU10_COMPONENT_Y
;
1939 operand0
.swizzleZ
= VGPU10_COMPONENT_Z
;
1940 operand0
.swizzleW
= VGPU10_COMPONENT_W
;
1942 emit_dword(emit
, operand0
.value
);
1947 * Emit tokens for the "stream" register used by the
1948 * DCL_STREAM, CUT_STREAM, EMIT_STREAM instructions.
1951 emit_stream_register(struct svga_shader_emitter_v10
*emit
, unsigned index
)
1953 VGPU10OperandToken0 operand0
;
1958 /* No register index for rasterizer index (there's only one) */
1959 operand0
.operandType
= VGPU10_OPERAND_TYPE_STREAM
;
1960 operand0
.indexDimension
= VGPU10_OPERAND_INDEX_1D
;
1961 operand0
.numComponents
= VGPU10_OPERAND_0_COMPONENT
;
1963 emit_dword(emit
, operand0
.value
);
1964 emit_dword(emit
, index
);
1969 * Emit the token for a VGPU10 opcode, with precise parameter.
1970 * \param saturate clamp result to [0,1]?
1973 emit_opcode_precise(struct svga_shader_emitter_v10
*emit
,
1974 unsigned vgpu10_opcode
, boolean saturate
, boolean precise
)
1976 VGPU10OpcodeToken0 token0
;
1978 token0
.value
= 0; /* init all fields to zero */
1979 token0
.opcodeType
= vgpu10_opcode
;
1980 token0
.instructionLength
= 0; /* Filled in by end_emit_instruction() */
1981 token0
.saturate
= saturate
;
1983 /* Mesa's GLSL IR -> TGSI translator will set the TGSI precise flag for
1984 * 'invariant' declarations. Only set preciseValues=1 if we have SM5.
1986 token0
.preciseValues
= precise
&& emit
->version
>= 50;
1988 emit_dword(emit
, token0
.value
);
1990 emit
->uses_precise_qualifier
|= token0
.preciseValues
;
1995 * Emit the token for a VGPU10 opcode.
1996 * \param saturate clamp result to [0,1]?
1999 emit_opcode(struct svga_shader_emitter_v10
*emit
,
2000 unsigned vgpu10_opcode
, boolean saturate
)
2002 emit_opcode_precise(emit
, vgpu10_opcode
, saturate
, FALSE
);
2007 * Emit the token for a VGPU10 resinfo instruction.
2008 * \param modifier return type modifier, _uint or _rcpFloat.
2009 * TODO: We may want to remove this parameter if it will
2010 * only ever be used as _uint.
2013 emit_opcode_resinfo(struct svga_shader_emitter_v10
*emit
,
2014 VGPU10_RESINFO_RETURN_TYPE modifier
)
2016 VGPU10OpcodeToken0 token0
;
2018 token0
.value
= 0; /* init all fields to zero */
2019 token0
.opcodeType
= VGPU10_OPCODE_RESINFO
;
2020 token0
.instructionLength
= 0; /* Filled in by end_emit_instruction() */
2021 token0
.resinfoReturnType
= modifier
;
2023 emit_dword(emit
, token0
.value
);
2028 * Emit opcode tokens for a texture sample instruction. Texture instructions
2029 * can be rather complicated (texel offsets, etc) so we have this specialized
2033 emit_sample_opcode(struct svga_shader_emitter_v10
*emit
,
2034 unsigned vgpu10_opcode
, boolean saturate
,
2035 const int offsets
[3])
2037 VGPU10OpcodeToken0 token0
;
2038 VGPU10OpcodeToken1 token1
;
2040 token0
.value
= 0; /* init all fields to zero */
2041 token0
.opcodeType
= vgpu10_opcode
;
2042 token0
.instructionLength
= 0; /* Filled in by end_emit_instruction() */
2043 token0
.saturate
= saturate
;
2045 if (offsets
[0] || offsets
[1] || offsets
[2]) {
2046 assert(offsets
[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET
);
2047 assert(offsets
[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET
);
2048 assert(offsets
[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET
);
2049 assert(offsets
[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET
);
2050 assert(offsets
[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET
);
2051 assert(offsets
[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET
);
2053 token0
.extended
= 1;
2055 token1
.opcodeType
= VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS
;
2056 token1
.offsetU
= offsets
[0];
2057 token1
.offsetV
= offsets
[1];
2058 token1
.offsetW
= offsets
[2];
2061 emit_dword(emit
, token0
.value
);
2062 if (token0
.extended
) {
2063 emit_dword(emit
, token1
.value
);
2069 * Emit a DISCARD opcode token.
2070 * If nonzero is set, we'll discard the fragment if the X component is not 0.
2071 * Otherwise, we'll discard the fragment if the X component is 0.
2074 emit_discard_opcode(struct svga_shader_emitter_v10
*emit
, boolean nonzero
)
2076 VGPU10OpcodeToken0 opcode0
;
2079 opcode0
.opcodeType
= VGPU10_OPCODE_DISCARD
;
2081 opcode0
.testBoolean
= VGPU10_INSTRUCTION_TEST_NONZERO
;
2083 emit_dword(emit
, opcode0
.value
);
2088 * We need to call this before we begin emitting a VGPU10 instruction.
2091 begin_emit_instruction(struct svga_shader_emitter_v10
*emit
)
2093 assert(emit
->inst_start_token
== 0);
2094 /* Save location of the instruction's VGPU10OpcodeToken0 token.
2095 * Note, we can't save a pointer because it would become invalid if
2096 * we have to realloc the output buffer.
2098 emit
->inst_start_token
= emit_get_num_tokens(emit
);
2103 * We need to call this after we emit the last token of a VGPU10 instruction.
2104 * This function patches in the opcode token's instructionLength field.
2107 end_emit_instruction(struct svga_shader_emitter_v10
*emit
)
2109 VGPU10OpcodeToken0
*tokens
= (VGPU10OpcodeToken0
*) emit
->buf
;
2110 unsigned inst_length
;
2112 assert(emit
->inst_start_token
> 0);
2114 if (emit
->discard_instruction
) {
2115 /* Back up the emit->ptr to where this instruction started so
2116 * that we discard the current instruction.
2118 emit
->ptr
= (char *) (tokens
+ emit
->inst_start_token
);
2121 /* Compute instruction length and patch that into the start of
2124 inst_length
= emit_get_num_tokens(emit
) - emit
->inst_start_token
;
2126 assert(inst_length
> 0);
2128 tokens
[emit
->inst_start_token
].instructionLength
= inst_length
;
2131 emit
->inst_start_token
= 0; /* reset to zero for error checking */
2132 emit
->discard_instruction
= FALSE
;
2137 * Return index for a free temporary register.
2140 get_temp_index(struct svga_shader_emitter_v10
*emit
)
2142 assert(emit
->internal_temp_count
< MAX_INTERNAL_TEMPS
);
2143 return emit
->num_shader_temps
+ emit
->internal_temp_count
++;
2148 * Release the temporaries which were generated by get_temp_index().
2151 free_temp_indexes(struct svga_shader_emitter_v10
*emit
)
2153 emit
->internal_temp_count
= 0;
2158 * Create a tgsi_full_src_register.
2160 static struct tgsi_full_src_register
2161 make_src_reg(enum tgsi_file_type file
, unsigned index
)
2163 struct tgsi_full_src_register reg
;
2165 memset(®
, 0, sizeof(reg
));
2166 reg
.Register
.File
= file
;
2167 reg
.Register
.Index
= index
;
2168 reg
.Register
.SwizzleX
= TGSI_SWIZZLE_X
;
2169 reg
.Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
2170 reg
.Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
2171 reg
.Register
.SwizzleW
= TGSI_SWIZZLE_W
;
2177 * Create a tgsi_full_src_register with a swizzle such that all four
2178 * vector components have the same scalar value.
2180 static struct tgsi_full_src_register
2181 make_src_scalar_reg(enum tgsi_file_type file
, unsigned index
, unsigned component
)
2183 struct tgsi_full_src_register reg
;
2185 assert(component
>= TGSI_SWIZZLE_X
);
2186 assert(component
<= TGSI_SWIZZLE_W
);
2188 memset(®
, 0, sizeof(reg
));
2189 reg
.Register
.File
= file
;
2190 reg
.Register
.Index
= index
;
2191 reg
.Register
.SwizzleX
=
2192 reg
.Register
.SwizzleY
=
2193 reg
.Register
.SwizzleZ
=
2194 reg
.Register
.SwizzleW
= component
;
2200 * Create a tgsi_full_src_register for a temporary.
2202 static struct tgsi_full_src_register
2203 make_src_temp_reg(unsigned index
)
2205 return make_src_reg(TGSI_FILE_TEMPORARY
, index
);
2210 * Create a tgsi_full_src_register for a constant.
2212 static struct tgsi_full_src_register
2213 make_src_const_reg(unsigned index
)
2215 return make_src_reg(TGSI_FILE_CONSTANT
, index
);
2220 * Create a tgsi_full_src_register for an immediate constant.
2222 static struct tgsi_full_src_register
2223 make_src_immediate_reg(unsigned index
)
2225 return make_src_reg(TGSI_FILE_IMMEDIATE
, index
);
2230 * Create a tgsi_full_dst_register.
2232 static struct tgsi_full_dst_register
2233 make_dst_reg(enum tgsi_file_type file
, unsigned index
)
2235 struct tgsi_full_dst_register reg
;
2237 memset(®
, 0, sizeof(reg
));
2238 reg
.Register
.File
= file
;
2239 reg
.Register
.Index
= index
;
2240 reg
.Register
.WriteMask
= TGSI_WRITEMASK_XYZW
;
2246 * Create a tgsi_full_dst_register for a temporary.
2248 static struct tgsi_full_dst_register
2249 make_dst_temp_reg(unsigned index
)
2251 return make_dst_reg(TGSI_FILE_TEMPORARY
, index
);
2256 * Create a tgsi_full_dst_register for an output.
2258 static struct tgsi_full_dst_register
2259 make_dst_output_reg(unsigned index
)
2261 return make_dst_reg(TGSI_FILE_OUTPUT
, index
);
2266 * Create negated tgsi_full_src_register.
2268 static struct tgsi_full_src_register
2269 negate_src(const struct tgsi_full_src_register
*reg
)
2271 struct tgsi_full_src_register neg
= *reg
;
2272 neg
.Register
.Negate
= !reg
->Register
.Negate
;
2277 * Create absolute value of a tgsi_full_src_register.
2279 static struct tgsi_full_src_register
2280 absolute_src(const struct tgsi_full_src_register
*reg
)
2282 struct tgsi_full_src_register absolute
= *reg
;
2283 absolute
.Register
.Absolute
= 1;
2288 /** Return the named swizzle term from the src register */
2289 static inline unsigned
2290 get_swizzle(const struct tgsi_full_src_register
*reg
, enum tgsi_swizzle term
)
2293 case TGSI_SWIZZLE_X
:
2294 return reg
->Register
.SwizzleX
;
2295 case TGSI_SWIZZLE_Y
:
2296 return reg
->Register
.SwizzleY
;
2297 case TGSI_SWIZZLE_Z
:
2298 return reg
->Register
.SwizzleZ
;
2299 case TGSI_SWIZZLE_W
:
2300 return reg
->Register
.SwizzleW
;
2302 assert(!"Bad swizzle");
2303 return TGSI_SWIZZLE_X
;
2309 * Create swizzled tgsi_full_src_register.
2311 static struct tgsi_full_src_register
2312 swizzle_src(const struct tgsi_full_src_register
*reg
,
2313 enum tgsi_swizzle swizzleX
, enum tgsi_swizzle swizzleY
,
2314 enum tgsi_swizzle swizzleZ
, enum tgsi_swizzle swizzleW
)
2316 struct tgsi_full_src_register swizzled
= *reg
;
2317 /* Note: we swizzle the current swizzle */
2318 swizzled
.Register
.SwizzleX
= get_swizzle(reg
, swizzleX
);
2319 swizzled
.Register
.SwizzleY
= get_swizzle(reg
, swizzleY
);
2320 swizzled
.Register
.SwizzleZ
= get_swizzle(reg
, swizzleZ
);
2321 swizzled
.Register
.SwizzleW
= get_swizzle(reg
, swizzleW
);
2327 * Create swizzled tgsi_full_src_register where all the swizzle
2328 * terms are the same.
2330 static struct tgsi_full_src_register
2331 scalar_src(const struct tgsi_full_src_register
*reg
, enum tgsi_swizzle swizzle
)
2333 struct tgsi_full_src_register swizzled
= *reg
;
2334 /* Note: we swizzle the current swizzle */
2335 swizzled
.Register
.SwizzleX
=
2336 swizzled
.Register
.SwizzleY
=
2337 swizzled
.Register
.SwizzleZ
=
2338 swizzled
.Register
.SwizzleW
= get_swizzle(reg
, swizzle
);
2344 * Create new tgsi_full_dst_register with writemask.
2345 * \param mask bitmask of TGSI_WRITEMASK_[XYZW]
2347 static struct tgsi_full_dst_register
2348 writemask_dst(const struct tgsi_full_dst_register
*reg
, unsigned mask
)
2350 struct tgsi_full_dst_register masked
= *reg
;
2351 masked
.Register
.WriteMask
= mask
;
2357 * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW.
2360 same_swizzle_terms(const struct tgsi_full_src_register
*reg
)
2362 return (reg
->Register
.SwizzleX
== reg
->Register
.SwizzleY
&&
2363 reg
->Register
.SwizzleY
== reg
->Register
.SwizzleZ
&&
2364 reg
->Register
.SwizzleZ
== reg
->Register
.SwizzleW
);
2369 * Search the vector for the value 'x' and return its position.
2372 find_imm_in_vec4(const union tgsi_immediate_data vec
[4],
2373 union tgsi_immediate_data x
)
2376 for (i
= 0; i
< 4; i
++) {
2377 if (vec
[i
].Int
== x
.Int
)
2385 * Helper used by make_immediate_reg(), make_immediate_reg_4().
2388 find_immediate(struct svga_shader_emitter_v10
*emit
,
2389 union tgsi_immediate_data x
, unsigned startIndex
)
2391 const unsigned endIndex
= emit
->num_immediates
;
2394 assert(emit
->immediates_emitted
);
2396 /* Search immediates for x, y, z, w */
2397 for (i
= startIndex
; i
< endIndex
; i
++) {
2398 if (x
.Int
== emit
->immediates
[i
][0].Int
||
2399 x
.Int
== emit
->immediates
[i
][1].Int
||
2400 x
.Int
== emit
->immediates
[i
][2].Int
||
2401 x
.Int
== emit
->immediates
[i
][3].Int
) {
2405 /* Should never try to use an immediate value that wasn't pre-declared */
2406 assert(!"find_immediate() failed!");
2412 * As above, but search for a double[2] pair.
2415 find_immediate_dbl(struct svga_shader_emitter_v10
*emit
,
2418 const unsigned endIndex
= emit
->num_immediates
;
2421 assert(emit
->immediates_emitted
);
2423 /* Search immediates for x, y, z, w */
2424 for (i
= 0; i
< endIndex
; i
++) {
2425 if (x
== emit
->immediates_dbl
[i
][0] &&
2426 y
== emit
->immediates_dbl
[i
][1]) {
2430 /* Should never try to use an immediate value that wasn't pre-declared */
2431 assert(!"find_immediate_dbl() failed!");
2438 * Return a tgsi_full_src_register for an immediate/literal
2439 * union tgsi_immediate_data[4] value.
2440 * Note: the values must have been previously declared/allocated in
2441 * emit_pre_helpers(). And, all of x,y,z,w must be located in the same
2444 static struct tgsi_full_src_register
2445 make_immediate_reg_4(struct svga_shader_emitter_v10
*emit
,
2446 const union tgsi_immediate_data imm
[4])
2448 struct tgsi_full_src_register reg
;
2451 for (i
= 0; i
< emit
->num_common_immediates
; i
++) {
2452 /* search for first component value */
2453 int immpos
= find_immediate(emit
, imm
[0], i
);
2456 assert(immpos
>= 0);
2458 /* find remaining components within the immediate vector */
2459 x
= find_imm_in_vec4(emit
->immediates
[immpos
], imm
[0]);
2460 y
= find_imm_in_vec4(emit
->immediates
[immpos
], imm
[1]);
2461 z
= find_imm_in_vec4(emit
->immediates
[immpos
], imm
[2]);
2462 w
= find_imm_in_vec4(emit
->immediates
[immpos
], imm
[3]);
2464 if (x
>=0 && y
>= 0 && z
>= 0 && w
>= 0) {
2465 /* found them all */
2466 memset(®
, 0, sizeof(reg
));
2467 reg
.Register
.File
= TGSI_FILE_IMMEDIATE
;
2468 reg
.Register
.Index
= immpos
;
2469 reg
.Register
.SwizzleX
= x
;
2470 reg
.Register
.SwizzleY
= y
;
2471 reg
.Register
.SwizzleZ
= z
;
2472 reg
.Register
.SwizzleW
= w
;
2475 /* else, keep searching */
2478 assert(!"Failed to find immediate register!");
2480 /* Just return IMM[0].xxxx */
2481 memset(®
, 0, sizeof(reg
));
2482 reg
.Register
.File
= TGSI_FILE_IMMEDIATE
;
2488 * Return a tgsi_full_src_register for an immediate/literal
2489 * union tgsi_immediate_data value of the form {value, value, value, value}.
2490 * \sa make_immediate_reg_4() regarding allowed values.
2492 static struct tgsi_full_src_register
2493 make_immediate_reg(struct svga_shader_emitter_v10
*emit
,
2494 union tgsi_immediate_data value
)
2496 struct tgsi_full_src_register reg
;
2497 int immpos
= find_immediate(emit
, value
, 0);
2499 assert(immpos
>= 0);
2501 memset(®
, 0, sizeof(reg
));
2502 reg
.Register
.File
= TGSI_FILE_IMMEDIATE
;
2503 reg
.Register
.Index
= immpos
;
2504 reg
.Register
.SwizzleX
=
2505 reg
.Register
.SwizzleY
=
2506 reg
.Register
.SwizzleZ
=
2507 reg
.Register
.SwizzleW
= find_imm_in_vec4(emit
->immediates
[immpos
], value
);
2514 * Return a tgsi_full_src_register for an immediate/literal float[4] value.
2515 * \sa make_immediate_reg_4() regarding allowed values.
2517 static struct tgsi_full_src_register
2518 make_immediate_reg_float4(struct svga_shader_emitter_v10
*emit
,
2519 float x
, float y
, float z
, float w
)
2521 union tgsi_immediate_data imm
[4];
2526 return make_immediate_reg_4(emit
, imm
);
2531 * Return a tgsi_full_src_register for an immediate/literal float value
2532 * of the form {value, value, value, value}.
2533 * \sa make_immediate_reg_4() regarding allowed values.
2535 static struct tgsi_full_src_register
2536 make_immediate_reg_float(struct svga_shader_emitter_v10
*emit
, float value
)
2538 union tgsi_immediate_data imm
;
2540 return make_immediate_reg(emit
, imm
);
2545 * Return a tgsi_full_src_register for an immediate/literal int[4] vector.
2547 static struct tgsi_full_src_register
2548 make_immediate_reg_int4(struct svga_shader_emitter_v10
*emit
,
2549 int x
, int y
, int z
, int w
)
2551 union tgsi_immediate_data imm
[4];
2556 return make_immediate_reg_4(emit
, imm
);
2561 * Return a tgsi_full_src_register for an immediate/literal int value
2562 * of the form {value, value, value, value}.
2563 * \sa make_immediate_reg_4() regarding allowed values.
2565 static struct tgsi_full_src_register
2566 make_immediate_reg_int(struct svga_shader_emitter_v10
*emit
, int value
)
2568 union tgsi_immediate_data imm
;
2570 return make_immediate_reg(emit
, imm
);
2574 static struct tgsi_full_src_register
2575 make_immediate_reg_double(struct svga_shader_emitter_v10
*emit
, double value
)
2577 struct tgsi_full_src_register reg
;
2578 int immpos
= find_immediate_dbl(emit
, value
, value
);
2580 assert(immpos
>= 0);
2582 memset(®
, 0, sizeof(reg
));
2583 reg
.Register
.File
= TGSI_FILE_IMMEDIATE
;
2584 reg
.Register
.Index
= immpos
;
2585 reg
.Register
.SwizzleX
= TGSI_SWIZZLE_X
;
2586 reg
.Register
.SwizzleY
= TGSI_SWIZZLE_Y
;
2587 reg
.Register
.SwizzleZ
= TGSI_SWIZZLE_Z
;
2588 reg
.Register
.SwizzleW
= TGSI_SWIZZLE_W
;
2595 * Allocate space for a union tgsi_immediate_data[4] immediate.
2596 * \return the index/position of the immediate.
2599 alloc_immediate_4(struct svga_shader_emitter_v10
*emit
,
2600 const union tgsi_immediate_data imm
[4])
2602 unsigned n
= emit
->num_immediates
++;
2603 assert(!emit
->immediates_emitted
);
2604 assert(n
< ARRAY_SIZE(emit
->immediates
));
2605 emit
->immediates
[n
][0] = imm
[0];
2606 emit
->immediates
[n
][1] = imm
[1];
2607 emit
->immediates
[n
][2] = imm
[2];
2608 emit
->immediates
[n
][3] = imm
[3];
2614 * Allocate space for a float[4] immediate.
2615 * \return the index/position of the immediate.
2618 alloc_immediate_float4(struct svga_shader_emitter_v10
*emit
,
2619 float x
, float y
, float z
, float w
)
2621 union tgsi_immediate_data imm
[4];
2626 return alloc_immediate_4(emit
, imm
);
2631 * Allocate space for an int[4] immediate.
2632 * \return the index/position of the immediate.
2635 alloc_immediate_int4(struct svga_shader_emitter_v10
*emit
,
2636 int x
, int y
, int z
, int w
)
2638 union tgsi_immediate_data imm
[4];
2643 return alloc_immediate_4(emit
, imm
);
2648 alloc_immediate_double2(struct svga_shader_emitter_v10
*emit
,
2651 unsigned n
= emit
->num_immediates
++;
2652 assert(!emit
->immediates_emitted
);
2653 assert(n
< ARRAY_SIZE(emit
->immediates
));
2654 emit
->immediates_dbl
[n
][0] = x
;
2655 emit
->immediates_dbl
[n
][1] = y
;
2662 * Allocate a shader input to store a system value.
2665 alloc_system_value_index(struct svga_shader_emitter_v10
*emit
, unsigned index
)
2667 const unsigned n
= emit
->linkage
.input_map_max
+ 1 + index
;
2668 assert(index
< ARRAY_SIZE(emit
->system_value_indexes
));
2669 emit
->system_value_indexes
[index
] = n
;
2675 * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10.
2678 emit_vgpu10_immediate(struct svga_shader_emitter_v10
*emit
,
2679 const struct tgsi_full_immediate
*imm
)
2681 /* We don't actually emit any code here. We just save the
2682 * immediate values and emit them later.
2684 alloc_immediate_4(emit
, imm
->u
);
2690 * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block
2691 * containing all the immediate values previously allocated
2692 * with alloc_immediate_4().
2695 emit_vgpu10_immediates_block(struct svga_shader_emitter_v10
*emit
)
2697 VGPU10OpcodeToken0 token
;
2699 assert(!emit
->immediates_emitted
);
2702 token
.opcodeType
= VGPU10_OPCODE_CUSTOMDATA
;
2703 token
.customDataClass
= VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER
;
2705 /* Note: no begin/end_emit_instruction() calls */
2706 emit_dword(emit
, token
.value
);
2707 emit_dword(emit
, 2 + 4 * emit
->num_immediates
);
2708 emit_dwords(emit
, (unsigned *) emit
->immediates
, 4 * emit
->num_immediates
);
2710 emit
->immediates_emitted
= TRUE
;
2717 * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10
2718 * interpolation mode.
2719 * \return a VGPU10_INTERPOLATION_x value
2722 translate_interpolation(const struct svga_shader_emitter_v10
*emit
,
2723 enum tgsi_interpolate_mode interp
,
2724 enum tgsi_interpolate_loc interpolate_loc
)
2726 if (interp
== TGSI_INTERPOLATE_COLOR
) {
2727 interp
= emit
->key
.fs
.flatshade
?
2728 TGSI_INTERPOLATE_CONSTANT
: TGSI_INTERPOLATE_PERSPECTIVE
;
2732 case TGSI_INTERPOLATE_CONSTANT
:
2733 return VGPU10_INTERPOLATION_CONSTANT
;
2734 case TGSI_INTERPOLATE_LINEAR
:
2735 if (interpolate_loc
== TGSI_INTERPOLATE_LOC_CENTROID
) {
2736 return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID
;
2737 } else if (interpolate_loc
== TGSI_INTERPOLATE_LOC_SAMPLE
&&
2738 emit
->version
>= 41) {
2739 return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE
;
2741 return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE
;
2744 case TGSI_INTERPOLATE_PERSPECTIVE
:
2745 if (interpolate_loc
== TGSI_INTERPOLATE_LOC_CENTROID
) {
2746 return VGPU10_INTERPOLATION_LINEAR_CENTROID
;
2747 } else if (interpolate_loc
== TGSI_INTERPOLATE_LOC_SAMPLE
&&
2748 emit
->version
>= 41) {
2749 return VGPU10_INTERPOLATION_LINEAR_SAMPLE
;
2751 return VGPU10_INTERPOLATION_LINEAR
;
2755 assert(!"Unexpected interpolation mode");
2756 return VGPU10_INTERPOLATION_CONSTANT
;
2762 * Translate a TGSI property to VGPU10.
2763 * Don't emit any instructions yet, only need to gather the primitive property
2764 * information. The output primitive topology might be changed later. The
2765 * final property instructions will be emitted as part of the pre-helper code.
2768 emit_vgpu10_property(struct svga_shader_emitter_v10
*emit
,
2769 const struct tgsi_full_property
*prop
)
2771 static const VGPU10_PRIMITIVE primType
[] = {
2772 VGPU10_PRIMITIVE_POINT
, /* PIPE_PRIM_POINTS */
2773 VGPU10_PRIMITIVE_LINE
, /* PIPE_PRIM_LINES */
2774 VGPU10_PRIMITIVE_LINE
, /* PIPE_PRIM_LINE_LOOP */
2775 VGPU10_PRIMITIVE_LINE
, /* PIPE_PRIM_LINE_STRIP */
2776 VGPU10_PRIMITIVE_TRIANGLE
, /* PIPE_PRIM_TRIANGLES */
2777 VGPU10_PRIMITIVE_TRIANGLE
, /* PIPE_PRIM_TRIANGLE_STRIP */
2778 VGPU10_PRIMITIVE_TRIANGLE
, /* PIPE_PRIM_TRIANGLE_FAN */
2779 VGPU10_PRIMITIVE_UNDEFINED
, /* PIPE_PRIM_QUADS */
2780 VGPU10_PRIMITIVE_UNDEFINED
, /* PIPE_PRIM_QUAD_STRIP */
2781 VGPU10_PRIMITIVE_UNDEFINED
, /* PIPE_PRIM_POLYGON */
2782 VGPU10_PRIMITIVE_LINE_ADJ
, /* PIPE_PRIM_LINES_ADJACENCY */
2783 VGPU10_PRIMITIVE_LINE_ADJ
, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
2784 VGPU10_PRIMITIVE_TRIANGLE_ADJ
, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
2785 VGPU10_PRIMITIVE_TRIANGLE_ADJ
/* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
2788 static const VGPU10_PRIMITIVE_TOPOLOGY primTopology
[] = {
2789 VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST
, /* PIPE_PRIM_POINTS */
2790 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST
, /* PIPE_PRIM_LINES */
2791 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST
, /* PIPE_PRIM_LINE_LOOP */
2792 VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP
, /* PIPE_PRIM_LINE_STRIP */
2793 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST
, /* PIPE_PRIM_TRIANGLES */
2794 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP
, /* PIPE_PRIM_TRIANGLE_STRIP */
2795 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP
, /* PIPE_PRIM_TRIANGLE_FAN */
2796 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED
, /* PIPE_PRIM_QUADS */
2797 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED
, /* PIPE_PRIM_QUAD_STRIP */
2798 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED
, /* PIPE_PRIM_POLYGON */
2799 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ
, /* PIPE_PRIM_LINES_ADJACENCY */
2800 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ
, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
2801 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ
, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
2802 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ
/* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
2805 static const unsigned inputArraySize
[] = {
2806 0, /* VGPU10_PRIMITIVE_UNDEFINED */
2807 1, /* VGPU10_PRIMITIVE_POINT */
2808 2, /* VGPU10_PRIMITIVE_LINE */
2809 3, /* VGPU10_PRIMITIVE_TRIANGLE */
2812 4, /* VGPU10_PRIMITIVE_LINE_ADJ */
2813 6 /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */
2816 switch (prop
->Property
.PropertyName
) {
2817 case TGSI_PROPERTY_GS_INPUT_PRIM
:
2818 assert(prop
->u
[0].Data
< ARRAY_SIZE(primType
));
2819 emit
->gs
.prim_type
= primType
[prop
->u
[0].Data
];
2820 assert(emit
->gs
.prim_type
!= VGPU10_PRIMITIVE_UNDEFINED
);
2821 emit
->gs
.input_size
= inputArraySize
[emit
->gs
.prim_type
];
2824 case TGSI_PROPERTY_GS_OUTPUT_PRIM
:
2825 assert(prop
->u
[0].Data
< ARRAY_SIZE(primTopology
));
2826 emit
->gs
.prim_topology
= primTopology
[prop
->u
[0].Data
];
2827 assert(emit
->gs
.prim_topology
!= VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED
);
2830 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES
:
2831 emit
->gs
.max_out_vertices
= prop
->u
[0].Data
;
2834 case TGSI_PROPERTY_GS_INVOCATIONS
:
2835 emit
->gs
.invocations
= prop
->u
[0].Data
;
2838 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
:
2839 case TGSI_PROPERTY_NEXT_SHADER
:
2840 case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED
:
2844 case TGSI_PROPERTY_TCS_VERTICES_OUT
:
2845 /* This info is already captured in the shader key */
2848 case TGSI_PROPERTY_TES_PRIM_MODE
:
2849 emit
->tes
.prim_mode
= prop
->u
[0].Data
;
2852 case TGSI_PROPERTY_TES_SPACING
:
2853 emit
->tes
.spacing
= prop
->u
[0].Data
;
2856 case TGSI_PROPERTY_TES_VERTEX_ORDER_CW
:
2857 emit
->tes
.vertices_order_cw
= prop
->u
[0].Data
;
2860 case TGSI_PROPERTY_TES_POINT_MODE
:
2861 emit
->tes
.point_mode
= prop
->u
[0].Data
;
2865 debug_printf("Unexpected TGSI property %s\n",
2866 tgsi_property_names
[prop
->Property
.PropertyName
]);
2874 emit_property_instruction(struct svga_shader_emitter_v10
*emit
,
2875 VGPU10OpcodeToken0 opcode0
, unsigned nData
,
2878 begin_emit_instruction(emit
);
2879 emit_dword(emit
, opcode0
.value
);
2881 emit_dword(emit
, data
);
2882 end_emit_instruction(emit
);
2887 * Emit property instructions
2890 emit_property_instructions(struct svga_shader_emitter_v10
*emit
)
2892 VGPU10OpcodeToken0 opcode0
;
2894 assert(emit
->unit
== PIPE_SHADER_GEOMETRY
);
2896 /* emit input primitive type declaration */
2898 opcode0
.opcodeType
= VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE
;
2899 opcode0
.primitive
= emit
->gs
.prim_type
;
2900 emit_property_instruction(emit
, opcode0
, 0, 0);
2902 /* emit max output vertices */
2904 opcode0
.opcodeType
= VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT
;
2905 emit_property_instruction(emit
, opcode0
, 1, emit
->gs
.max_out_vertices
);
2907 if (emit
->version
>= 50 && emit
->gs
.invocations
> 0) {
2909 opcode0
.opcodeType
= VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT
;
2910 emit_property_instruction(emit
, opcode0
, 1, emit
->gs
.invocations
);
2916 * A helper function to declare tessellator domain in a hull shader or
2917 * in the domain shader.
2920 emit_tessellator_domain(struct svga_shader_emitter_v10
*emit
,
2921 enum pipe_prim_type prim_mode
)
2923 VGPU10OpcodeToken0 opcode0
;
2926 opcode0
.opcodeType
= VGPU10_OPCODE_DCL_TESS_DOMAIN
;
2927 switch (prim_mode
) {
2928 case PIPE_PRIM_QUADS
:
2929 case PIPE_PRIM_LINES
:
2930 opcode0
.tessDomain
= VGPU10_TESSELLATOR_DOMAIN_QUAD
;
2932 case PIPE_PRIM_TRIANGLES
:
2933 opcode0
.tessDomain
= VGPU10_TESSELLATOR_DOMAIN_TRI
;
2936 debug_printf("Invalid tessellator prim mode %d\n", prim_mode
);
2937 opcode0
.tessDomain
= VGPU10_TESSELLATOR_DOMAIN_UNDEFINED
;
2939 begin_emit_instruction(emit
);
2940 emit_dword(emit
, opcode0
.value
);
2941 end_emit_instruction(emit
);
2946 * Emit domain shader declarations.
2949 emit_domain_shader_declarations(struct svga_shader_emitter_v10
*emit
)
2951 VGPU10OpcodeToken0 opcode0
;
2953 assert(emit
->unit
== PIPE_SHADER_TESS_EVAL
);
2955 /* Emit the input control point count */
2956 assert(emit
->key
.tes
.vertices_per_patch
>= 0 &&
2957 emit
->key
.tes
.vertices_per_patch
<= 32);
2960 opcode0
.opcodeType
= VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT
;
2961 opcode0
.controlPointCount
= emit
->key
.tes
.vertices_per_patch
;
2962 begin_emit_instruction(emit
);
2963 emit_dword(emit
, opcode0
.value
);
2964 end_emit_instruction(emit
);
2966 emit_tessellator_domain(emit
, emit
->tes
.prim_mode
);
2971 * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
2972 * to implement some instructions. We pre-allocate those values here
2973 * in the immediate constant buffer.
2976 alloc_common_immediates(struct svga_shader_emitter_v10
*emit
)
2980 emit
->common_immediate_pos
[n
++] =
2981 alloc_immediate_float4(emit
, 0.0f
, 1.0f
, 0.5f
, -1.0f
);
2983 if (emit
->info
.opcode_count
[TGSI_OPCODE_LIT
] > 0) {
2984 emit
->common_immediate_pos
[n
++] =
2985 alloc_immediate_float4(emit
, 128.0f
, -128.0f
, 0.0f
, 0.0f
);
2988 emit
->common_immediate_pos
[n
++] =
2989 alloc_immediate_int4(emit
, 0, 1, 0, -1);
2991 if (emit
->info
.opcode_count
[TGSI_OPCODE_IMSB
] > 0 ||
2992 emit
->info
.opcode_count
[TGSI_OPCODE_UMSB
] > 0) {
2993 emit
->common_immediate_pos
[n
++] =
2994 alloc_immediate_int4(emit
, 31, 0, 0, 0);
2997 if (emit
->info
.opcode_count
[TGSI_OPCODE_UBFE
] > 0 ||
2998 emit
->info
.opcode_count
[TGSI_OPCODE_IBFE
] > 0 ||
2999 emit
->info
.opcode_count
[TGSI_OPCODE_BFI
] > 0) {
3000 emit
->common_immediate_pos
[n
++] =
3001 alloc_immediate_int4(emit
, 32, 0, 0, 0);
3004 if (emit
->key
.vs
.attrib_puint_to_snorm
) {
3005 emit
->common_immediate_pos
[n
++] =
3006 alloc_immediate_float4(emit
, -2.0f
, 2.0f
, 3.0f
, -1.66666f
);
3009 if (emit
->key
.vs
.attrib_puint_to_uscaled
) {
3010 emit
->common_immediate_pos
[n
++] =
3011 alloc_immediate_float4(emit
, 1023.0f
, 3.0f
, 0.0f
, 0.0f
);
3014 if (emit
->key
.vs
.attrib_puint_to_sscaled
) {
3015 emit
->common_immediate_pos
[n
++] =
3016 alloc_immediate_int4(emit
, 22, 12, 2, 0);
3018 emit
->common_immediate_pos
[n
++] =
3019 alloc_immediate_int4(emit
, 22, 30, 0, 0);
3022 if (emit
->vposition
.num_prescale
> 1) {
3024 for (i
= 0; i
< emit
->vposition
.num_prescale
; i
+=4) {
3025 emit
->common_immediate_pos
[n
++] =
3026 alloc_immediate_int4(emit
, i
, i
+1, i
+2, i
+3);
3030 emit
->immediates_dbl
= (double (*)[2]) emit
->immediates
;
3032 if (emit
->info
.opcode_count
[TGSI_OPCODE_DNEG
] > 0) {
3033 emit
->common_immediate_pos
[n
++] =
3034 alloc_immediate_double2(emit
, -1.0, -1.0);
3037 if (emit
->info
.opcode_count
[TGSI_OPCODE_DSQRT
] > 0) {
3038 emit
->common_immediate_pos
[n
++] =
3039 alloc_immediate_double2(emit
, 0.0, 0.0);
3040 emit
->common_immediate_pos
[n
++] =
3041 alloc_immediate_double2(emit
, 1.0, 1.0);
3044 if (emit
->info
.opcode_count
[TGSI_OPCODE_INTERP_OFFSET
] > 0) {
3045 emit
->common_immediate_pos
[n
++] =
3046 alloc_immediate_float4(emit
, 16.0f
, -16.0f
, 0.0, 0.0);
3049 assert(n
<= ARRAY_SIZE(emit
->common_immediate_pos
));
3053 for (i
= 0; i
< PIPE_MAX_SAMPLERS
; i
++) {
3054 if (emit
->key
.tex
[i
].texel_bias
) {
3055 /* Replace 0.0f if more immediate float value is needed */
3056 emit
->common_immediate_pos
[n
++] =
3057 alloc_immediate_float4(emit
, 0.0001f
, 0.0f
, 0.0f
, 0.0f
);
3062 assert(n
<= ARRAY_SIZE(emit
->common_immediate_pos
));
3063 emit
->num_common_immediates
= n
;
3068 * Emit hull shader declarations.
3071 emit_hull_shader_declarations(struct svga_shader_emitter_v10
*emit
)
3073 VGPU10OpcodeToken0 opcode0
;
3075 /* Emit the input control point count */
3076 assert(emit
->key
.tcs
.vertices_per_patch
> 0 &&
3077 emit
->key
.tcs
.vertices_per_patch
<= 32);
3080 opcode0
.opcodeType
= VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT
;
3081 opcode0
.controlPointCount
= emit
->key
.tcs
.vertices_per_patch
;
3082 begin_emit_instruction(emit
);
3083 emit_dword(emit
, opcode0
.value
);
3084 end_emit_instruction(emit
);
3086 /* Emit the output control point count */
3087 assert(emit
->key
.tcs
.vertices_out
>= 0 && emit
->key
.tcs
.vertices_out
<= 32);
3090 opcode0
.opcodeType
= VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT
;
3091 opcode0
.controlPointCount
= emit
->key
.tcs
.vertices_out
;
3092 begin_emit_instruction(emit
);
3093 emit_dword(emit
, opcode0
.value
);
3094 end_emit_instruction(emit
);
3096 /* Emit tessellator domain */
3097 emit_tessellator_domain(emit
, emit
->key
.tcs
.prim_mode
);
3099 /* Emit tessellator output primitive */
3101 opcode0
.opcodeType
= VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE
;
3102 if (emit
->key
.tcs
.point_mode
) {
3103 opcode0
.tessOutputPrimitive
= VGPU10_TESSELLATOR_OUTPUT_POINT
;
3105 else if (emit
->key
.tcs
.prim_mode
== PIPE_PRIM_LINES
) {
3106 opcode0
.tessOutputPrimitive
= VGPU10_TESSELLATOR_OUTPUT_LINE
;
3109 assert(emit
->key
.tcs
.prim_mode
== PIPE_PRIM_QUADS
||
3110 emit
->key
.tcs
.prim_mode
== PIPE_PRIM_TRIANGLES
);
3112 if (emit
->key
.tcs
.vertices_order_cw
)
3113 opcode0
.tessOutputPrimitive
= VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CCW
;
3115 opcode0
.tessOutputPrimitive
= VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CW
;
3117 begin_emit_instruction(emit
);
3118 emit_dword(emit
, opcode0
.value
);
3119 end_emit_instruction(emit
);
3121 /* Emit tessellator partitioning */
3123 opcode0
.opcodeType
= VGPU10_OPCODE_DCL_TESS_PARTITIONING
;
3124 switch (emit
->key
.tcs
.spacing
) {
3125 case PIPE_TESS_SPACING_FRACTIONAL_ODD
:
3126 opcode0
.tessPartitioning
= VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD
;
3128 case PIPE_TESS_SPACING_FRACTIONAL_EVEN
:
3129 opcode0
.tessPartitioning
= VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN
;
3131 case PIPE_TESS_SPACING_EQUAL
:
3132 opcode0
.tessPartitioning
= VGPU10_TESSELLATOR_PARTITIONING_INTEGER
;
3135 debug_printf("invalid tessellator spacing %d\n", emit
->key
.tcs
.spacing
);
3136 opcode0
.tessPartitioning
= VGPU10_TESSELLATOR_PARTITIONING_UNDEFINED
;
3138 begin_emit_instruction(emit
);
3139 emit_dword(emit
, opcode0
.value
);
3140 end_emit_instruction(emit
);
3142 /* Declare constant registers */
3143 emit_constant_declaration(emit
);
3145 /* Declare samplers and resources */
3146 emit_sampler_declarations(emit
);
3147 emit_resource_declarations(emit
);
3149 alloc_common_immediates(emit
);
3151 int nVertices
= emit
->key
.tcs
.vertices_per_patch
;
3152 emit
->tcs
.imm_index
=
3153 alloc_immediate_int4(emit
, nVertices
, nVertices
, nVertices
, 0);
3155 /* Now, emit the constant block containing all the immediates
3156 * declared by shader, as well as the extra ones seen above.
3158 emit_vgpu10_immediates_block(emit
);
3164 * A helper function to determine if control point phase is needed.
3165 * Returns TRUE if there is control point output.
3168 needs_control_point_phase(struct svga_shader_emitter_v10
*emit
)
3172 assert(emit
->unit
== PIPE_SHADER_TESS_CTRL
);
3174 /* If output control point count does not match the input count,
3175 * we need a control point phase to explicitly set the output control
3178 if ((emit
->key
.tcs
.vertices_per_patch
!= emit
->key
.tcs
.vertices_out
) &&
3179 emit
->key
.tcs
.vertices_out
)
3182 for (i
= 0; i
< emit
->info
.num_outputs
; i
++) {
3183 switch (emit
->info
.output_semantic_name
[i
]) {
3184 case TGSI_SEMANTIC_PATCH
:
3185 case TGSI_SEMANTIC_TESSOUTER
:
3186 case TGSI_SEMANTIC_TESSINNER
:
3197 * A helper function to add shader signature for passthrough control point
3198 * phase. This signature is also generated for passthrough control point
3199 * phase from HLSL compiler and is needed by Metal Renderer.
3202 emit_passthrough_control_point_signature(struct svga_shader_emitter_v10
*emit
)
3204 struct svga_shader_signature
*sgn
= &emit
->signature
;
3205 SVGA3dDXShaderSignatureEntry
*sgnEntry
;
3208 for (i
= 0; i
< emit
->info
.num_inputs
; i
++) {
3209 unsigned index
= emit
->linkage
.input_map
[i
];
3210 enum tgsi_semantic sem_name
= emit
->info
.input_semantic_name
[i
];
3212 sgnEntry
= &sgn
->inputs
[sgn
->header
.numInputSignatures
++];
3214 set_shader_signature_entry(sgnEntry
, index
,
3215 tgsi_semantic_to_sgn_name
[sem_name
],
3216 VGPU10_OPERAND_4_COMPONENT_MASK_ALL
,
3217 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN
,
3218 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT
);
3220 sgnEntry
= &sgn
->outputs
[sgn
->header
.numOutputSignatures
++];
3222 set_shader_signature_entry(sgnEntry
, i
,
3223 tgsi_semantic_to_sgn_name
[sem_name
],
3224 VGPU10_OPERAND_4_COMPONENT_MASK_ALL
,
3225 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN
,
3226 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT
);
3232 * A helper function to emit an instruction to start the control point phase
3233 * in the hull shader.
3236 emit_control_point_phase_instruction(struct svga_shader_emitter_v10
*emit
)
3238 VGPU10OpcodeToken0 opcode0
;
3241 opcode0
.opcodeType
= VGPU10_OPCODE_HS_CONTROL_POINT_PHASE
;
3242 begin_emit_instruction(emit
);
3243 emit_dword(emit
, opcode0
.value
);
3244 end_emit_instruction(emit
);
3249 * Start the hull shader control point phase
3252 emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10
*emit
)
3254 /* If there is no control point output, skip the control point phase. */
3255 if (!needs_control_point_phase(emit
)) {
3256 if (!emit
->key
.tcs
.vertices_out
) {
3258 * If the tcs does not explicitly generate any control point output
3259 * and the tes does not use any input control point, then
3260 * emit an empty control point phase with zero output control
3263 emit_control_point_phase_instruction(emit
);
3266 * Since this is an empty control point phase, we will need to
3267 * add input signatures when we parse the tcs again in the
3268 * patch constant phase.
3270 emit
->tcs
.fork_phase_add_signature
= TRUE
;
3274 * Before skipping the control point phase, add the signature for
3275 * the passthrough control point.
3277 emit_passthrough_control_point_signature(emit
);
3282 /* Start the control point phase in the hull shader */
3283 emit_control_point_phase_instruction(emit
);
3285 /* Declare the output control point ID */
3286 if (emit
->tcs
.invocation_id_sys_index
== INVALID_INDEX
) {
3287 /* Add invocation id declaration if it does not exist */
3288 emit
->tcs
.invocation_id_sys_index
= emit
->info
.num_system_values
+ 1;
3291 emit_input_declaration(emit
, VGPU10_OPCODE_DCL_INPUT
,
3292 VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID
,
3293 VGPU10_OPERAND_INDEX_0D
,
3295 VGPU10_NAME_UNDEFINED
,
3296 VGPU10_OPERAND_0_COMPONENT
, 0,
3298 VGPU10_INTERPOLATION_CONSTANT
, TRUE
,
3299 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
);
3301 if (emit
->tcs
.prim_id_index
!= INVALID_INDEX
) {
3302 emit_input_declaration(emit
, VGPU10_OPCODE_DCL_INPUT
,
3303 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID
,
3304 VGPU10_OPERAND_INDEX_0D
,
3306 VGPU10_NAME_UNDEFINED
,
3307 VGPU10_OPERAND_0_COMPONENT
,
3308 VGPU10_OPERAND_4_COMPONENT_MASK_MODE
,
3310 VGPU10_INTERPOLATION_UNDEFINED
, TRUE
,
3311 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID
);
3319 * Start the hull shader patch constant phase and
3320 * do the second pass of the tcs translation and emit
3321 * the relevant declarations and instructions for this phase.
3324 emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10
*emit
,
3325 struct tgsi_parse_context
*parse
)
3327 unsigned inst_number
= 0;
3329 VGPU10OpcodeToken0 opcode0
;
3331 emit
->skip_instruction
= FALSE
;
3333 /* Start the patch constant phase */
3335 opcode0
.opcodeType
= VGPU10_OPCODE_HS_FORK_PHASE
;
3336 begin_emit_instruction(emit
);
3337 emit_dword(emit
, opcode0
.value
);
3338 end_emit_instruction(emit
);
3340 /* Set the current phase to patch constant phase */
3341 emit
->tcs
.control_point_phase
= FALSE
;
3343 if (emit
->tcs
.prim_id_index
!= INVALID_INDEX
) {
3344 emit_input_declaration(emit
, VGPU10_OPCODE_DCL_INPUT
,
3345 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID
,
3346 VGPU10_OPERAND_INDEX_0D
,
3348 VGPU10_NAME_UNDEFINED
,
3349 VGPU10_OPERAND_0_COMPONENT
,
3350 VGPU10_OPERAND_4_COMPONENT_MASK_MODE
,
3352 VGPU10_INTERPOLATION_UNDEFINED
, TRUE
,
3353 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID
);
3356 /* Emit declarations for this phase */
3357 emit
->index_range
.required
=
3358 emit
->info
.indirect_files
& (1 << TGSI_FILE_INPUT
) ? TRUE
: FALSE
;
3359 emit_tcs_input_declarations(emit
);
3361 if (emit
->index_range
.start_index
!= INVALID_INDEX
) {
3362 emit_index_range_declaration(emit
);
3365 emit
->index_range
.required
=
3366 emit
->info
.indirect_files
& (1 << TGSI_FILE_OUTPUT
) ? TRUE
: FALSE
;
3367 emit_tcs_output_declarations(emit
);
3369 if (emit
->index_range
.start_index
!= INVALID_INDEX
) {
3370 emit_index_range_declaration(emit
);
3372 emit
->index_range
.required
= FALSE
;
3374 emit_temporaries_declaration(emit
);
3376 /* Reset the token position to the first instruction token
3377 * in preparation for the second pass of the shader
3379 parse
->Position
= emit
->tcs
.instruction_token_pos
;
3381 while (!tgsi_parse_end_of_tokens(parse
)) {
3382 tgsi_parse_token(parse
);
3384 assert(parse
->FullToken
.Token
.Type
== TGSI_TOKEN_TYPE_INSTRUCTION
);
3385 ret
= emit_vgpu10_instruction(emit
, inst_number
++,
3386 &parse
->FullToken
.FullInstruction
);
3388 /* Usually this applies to TCS only. If shader is reading output of
3389 * patch constant in fork phase, we should reemit all instructions
3390 * which are writting into ouput of patch constant in fork phase
3391 * to store results into temporaries.
3393 if (emit
->reemit_instruction
) {
3394 assert(emit
->unit
== PIPE_SHADER_TESS_CTRL
);
3395 ret
= emit_vgpu10_instruction(emit
, inst_number
,
3396 &parse
->FullToken
.FullInstruction
);
3408 * Emit index range declaration.
3411 emit_index_range_declaration(struct svga_shader_emitter_v10
*emit
)
3413 if (emit
->version
< 50)
3416 assert(emit
->index_range
.start_index
!= INVALID_INDEX
);
3417 assert(emit
->index_range
.count
!= 0);
3418 assert(emit
->index_range
.required
);
3419 assert(emit
->index_range
.operandType
!= VGPU10_NUM_OPERANDS
);
3420 assert(emit
->index_range
.dim
!= 0);
3421 assert(emit
->index_range
.size
!= 0);
3423 VGPU10OpcodeToken0 opcode0
;
3424 VGPU10OperandToken0 operand0
;
3427 opcode0
.opcodeType
= VGPU10_OPCODE_DCL_INDEX_RANGE
;
3430 operand0
.numComponents
= VGPU10_OPERAND_4_COMPONENT
;
3431 operand0
.indexDimension
= emit
->index_range
.dim
;
3432 operand0
.operandType
= emit
->index_range
.operandType
;
3433 operand0
.mask
= VGPU10_OPERAND_4_COMPONENT_MASK_ALL
;
3434 operand0
.index0Representation
= VGPU10_OPERAND_INDEX_IMMEDIATE32
;
3436 if (emit
->index_range
.dim
== VGPU10_OPERAND_INDEX_2D
)
3437 operand0
.index1Representation
= VGPU10_OPERAND_INDEX_IMMEDIATE32
;
3439 begin_emit_instruction(emit
);
3440 emit_dword(emit
, opcode0
.value
);
3441 emit_dword(emit
, operand0
.value
);
3443 if (emit
->index_range
.dim
== VGPU10_OPERAND_INDEX_2D
) {
3444 emit_dword(emit
, emit
->index_range
.size
);
3445 emit_dword(emit
, emit
->index_range
.start_index
);
3446 emit_dword(emit
, emit
->index_range
.count
);
3449 emit_dword(emit
, emit
->index_range
.start_index
);
3450 emit_dword(emit
, emit
->index_range
.count
);
3453 end_emit_instruction(emit
);
3455 /* Reset fields in emit->index_range struct except
3456 * emit->index_range.required which will be reset afterwards
3458 emit
->index_range
.count
= 0;
3459 emit
->index_range
.operandType
= VGPU10_NUM_OPERANDS
;
3460 emit
->index_range
.start_index
= INVALID_INDEX
;
3461 emit
->index_range
.size
= 0;
3462 emit
->index_range
.dim
= 0;
3469 * Emit a vgpu10 declaration "instruction".
3470 * \param index the register index
3471 * \param size array size of the operand. In most cases, it is 1,
3472 * but for inputs to geometry shader, the array size varies
3473 * depending on the primitive type.
3476 emit_decl_instruction(struct svga_shader_emitter_v10
*emit
,
3477 VGPU10OpcodeToken0 opcode0
,
3478 VGPU10OperandToken0 operand0
,
3479 VGPU10NameToken name_token
,
3480 unsigned index
, unsigned size
)
3482 assert(opcode0
.opcodeType
);
3483 assert(operand0
.mask
||
3484 (operand0
.operandType
== VGPU10_OPERAND_TYPE_OUTPUT
) ||
3485 (operand0
.operandType
== VGPU10_OPERAND_TYPE_OUTPUT_DEPTH
) ||
3486 (operand0
.operandType
== VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK
) ||
3487 (operand0
.operandType
== VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID
) ||
3488 (operand0
.operandType
== VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID
) ||
3489 (operand0
.operandType
== VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID
) ||
3490 (operand0
.operandType
== VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK
) ||
3491 (operand0
.operandType
== VGPU10_OPERAND_TYPE_STREAM
));
3493 begin_emit_instruction(emit
);
3494 emit_dword(emit
, opcode0
.value
);
3496 emit_dword(emit
, operand0
.value
);
3498 if (operand0
.indexDimension
== VGPU10_OPERAND_INDEX_1D
) {
3499 /* Next token is the index of the register to declare */
3500 emit_dword(emit
, index
);
3502 else if (operand0
.indexDimension
>= VGPU10_OPERAND_INDEX_2D
) {
3503 /* Next token is the size of the register */
3504 emit_dword(emit
, size
);
3506 /* Followed by the index of the register */
3507 emit_dword(emit
, index
);
3510 if (name_token
.value
) {
3511 emit_dword(emit
, name_token
.value
);
3514 end_emit_instruction(emit
);
3519 * Emit the declaration for a shader input.
3520 * \param opcodeType opcode type, one of VGPU10_OPCODE_DCL_INPUTx
3521 * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x
3522 * \param dim index dimension
3523 * \param index the input register index
3524 * \param size array size of the operand. In most cases, it is 1,
3525 * but for inputs to geometry shader, the array size varies
3526 * depending on the primitive type. For tessellation control
3527 * shader, the array size is the vertex count per patch.
3528 * \param name one of VGPU10_NAME_x
3529 * \parma numComp number of components
3530 * \param selMode component selection mode
3531 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
3532 * \param interpMode interpolation mode
3535 emit_input_declaration(struct svga_shader_emitter_v10
*emit
,
3536 VGPU10_OPCODE_TYPE opcodeType
,
3537 VGPU10_OPERAND_TYPE operandType
,
3538 VGPU10_OPERAND_INDEX_DIMENSION dim
,
3539 unsigned index
, unsigned size
,
3540 VGPU10_SYSTEM_NAME name
,
3541 VGPU10_OPERAND_NUM_COMPONENTS numComp
,
3542 VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode
,
3544 VGPU10_INTERPOLATION_MODE interpMode
,
3545 boolean addSignature
,
3546 SVGA3dDXSignatureSemanticName sgnName
)
3548 VGPU10OpcodeToken0 opcode0
;
3549 VGPU10OperandToken0 operand0
;
3550 VGPU10NameToken name_token
;
3552 assert(usageMask
<= VGPU10_OPERAND_4_COMPONENT_MASK_ALL
);
3553 assert(opcodeType
== VGPU10_OPCODE_DCL_INPUT
||
3554 opcodeType
== VGPU10_OPCODE_DCL_INPUT_SIV
||
3555 opcodeType
== VGPU10_OPCODE_DCL_INPUT_SGV
||
3556 opcodeType
== VGPU10_OPCODE_DCL_INPUT_PS
||
3557 opcodeType
== VGPU10_OPCODE_DCL_INPUT_PS_SIV
||
3558 opcodeType
== VGPU10_OPCODE_DCL_INPUT_PS_SGV
);
3559 assert(operandType
== VGPU10_OPERAND_TYPE_INPUT
||
3560 operandType
== VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID
||
3561 operandType
== VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK
||
3562 operandType
== VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID
||
3563 operandType
== VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID
||
3564 operandType
== VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT
||
3565 operandType
== VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT
||
3566 operandType
== VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT
||
3567 operandType
== VGPU10_OPERAND_TYPE_INPUT_THREAD_ID
||
3568 operandType
== VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID
||
3569 operandType
== VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP
);
3571 assert(numComp
<= VGPU10_OPERAND_4_COMPONENT
);
3572 assert(selMode
<= VGPU10_OPERAND_4_COMPONENT_MASK_MODE
);
3573 assert(dim
<= VGPU10_OPERAND_INDEX_3D
);
3574 assert(name
== VGPU10_NAME_UNDEFINED
||
3575 name
== VGPU10_NAME_POSITION
||
3576 name
== VGPU10_NAME_INSTANCE_ID
||
3577 name
== VGPU10_NAME_VERTEX_ID
||
3578 name
== VGPU10_NAME_PRIMITIVE_ID
||
3579 name
== VGPU10_NAME_IS_FRONT_FACE
||
3580 name
== VGPU10_NAME_SAMPLE_INDEX
||
3581 name
== VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX
||
3582 name
== VGPU10_NAME_VIEWPORT_ARRAY_INDEX
);
3584 assert(interpMode
== VGPU10_INTERPOLATION_UNDEFINED
||
3585 interpMode
== VGPU10_INTERPOLATION_CONSTANT
||
3586 interpMode
== VGPU10_INTERPOLATION_LINEAR
||
3587 interpMode
== VGPU10_INTERPOLATION_LINEAR_CENTROID
||
3588 interpMode
== VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE
||
3589 interpMode
== VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID
||
3590 interpMode
== VGPU10_INTERPOLATION_LINEAR_SAMPLE
||
3591 interpMode
== VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE
);
3593 check_register_index(emit
, opcodeType
, index
);
3595 opcode0
.value
= operand0
.value
= name_token
.value
= 0;
3597 opcode0
.opcodeType
= opcodeType
;
3598 opcode0
.interpolationMode
= interpMode
;
3600 operand0
.operandType
= operandType
;
3601 operand0
.numComponents
= numComp
;
3602 operand0
.selectionMode
= selMode
;
3603 operand0
.mask
= usageMask
;
3604 operand0
.indexDimension
= dim
;
3605 operand0
.index0Representation
= VGPU10_OPERAND_INDEX_IMMEDIATE32
;
3606 if (dim
== VGPU10_OPERAND_INDEX_2D
)
3607 operand0
.index1Representation
= VGPU10_OPERAND_INDEX_IMMEDIATE32
;
3609 name_token
.name
= name
;
3611 emit_decl_instruction(emit
, opcode0
, operand0
, name_token
, index
, size
);
3614 struct svga_shader_signature
*sgn
= &emit
->signature
;
3615 if (operandType
== VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT
) {
3616 /* Set patch constant signature */
3617 SVGA3dDXShaderSignatureEntry
*sgnEntry
=
3618 &sgn
->patchConstants
[sgn
->header
.numPatchConstantSignatures
++];
3619 set_shader_signature_entry(sgnEntry
, index
,
3621 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN
,
3622 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT
);
3624 } else if (operandType
== VGPU10_OPERAND_TYPE_INPUT
||
3625 operandType
== VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT
) {
3626 /* Set input signature */
3627 SVGA3dDXShaderSignatureEntry
*sgnEntry
=
3628 &sgn
->inputs
[sgn
->header
.numInputSignatures
++];
3629 set_shader_signature_entry(sgnEntry
, index
,
3631 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN
,
3632 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT
);
3636 if (emit
->index_range
.required
) {
3637 /* Here, index_range declaration is only applicable for opcodeType
3638 * VGPU10_OPCODE_DCL_INPUT and VGPU10_OPCODE_DCL_INPUT_PS and
3639 * for operandType VGPU10_OPERAND_TYPE_INPUT,
3640 * VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT and
3641 * VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT.
3643 if ((opcodeType
!= VGPU10_OPCODE_DCL_INPUT
&&
3644 opcodeType
!= VGPU10_OPCODE_DCL_INPUT_PS
) ||
3645 (operandType
!= VGPU10_OPERAND_TYPE_INPUT
&&
3646 operandType
!= VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT
&&
3647 operandType
!= VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT
)) {
3648 if (emit
->index_range
.start_index
!= INVALID_INDEX
) {
3649 emit_index_range_declaration(emit
);
3654 if (emit
->index_range
.operandType
== VGPU10_NUM_OPERANDS
) {
3655 /* Need record new index_range */
3656 emit
->index_range
.count
= 1;
3657 emit
->index_range
.operandType
= operandType
;
3658 emit
->index_range
.start_index
= index
;
3659 emit
->index_range
.size
= size
;
3660 emit
->index_range
.dim
= dim
;
3663 (emit
->index_range
.start_index
+ emit
->index_range
.count
) ||
3664 emit
->index_range
.operandType
!= operandType
) {
3665 /* Input index is not contiguous with index range or operandType is
3666 * different from index range's operandType. We need to emit current
3667 * index_range first and then start recording next index range.
3669 emit_index_range_declaration(emit
);
3671 emit
->index_range
.count
= 1;
3672 emit
->index_range
.operandType
= operandType
;
3673 emit
->index_range
.start_index
= index
;
3674 emit
->index_range
.size
= size
;
3675 emit
->index_range
.dim
= dim
;
3677 else if (emit
->index_range
.operandType
== operandType
) {
3678 /* Since input index is contiguous with index range and operandType
3679 * is same as index range's operandType, increment index range count.
3681 emit
->index_range
.count
++;
3688 * Emit the declaration for a shader output.
3689 * \param type one of VGPU10_OPCODE_DCL_OUTPUTx
3690 * \param index the output register index
3691 * \param name one of VGPU10_NAME_x
3692 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
3695 emit_output_declaration(struct svga_shader_emitter_v10
*emit
,
3696 VGPU10_OPCODE_TYPE type
, unsigned index
,
3697 VGPU10_SYSTEM_NAME name
,
3699 boolean addSignature
,
3700 SVGA3dDXSignatureSemanticName sgnName
)
3702 VGPU10OpcodeToken0 opcode0
;
3703 VGPU10OperandToken0 operand0
;
3704 VGPU10NameToken name_token
;
3706 assert(writemask
<= VGPU10_OPERAND_4_COMPONENT_MASK_ALL
);
3707 assert(type
== VGPU10_OPCODE_DCL_OUTPUT
||
3708 type
== VGPU10_OPCODE_DCL_OUTPUT_SGV
||
3709 type
== VGPU10_OPCODE_DCL_OUTPUT_SIV
);
3710 assert(name
== VGPU10_NAME_UNDEFINED
||
3711 name
== VGPU10_NAME_POSITION
||
3712 name
== VGPU10_NAME_PRIMITIVE_ID
||
3713 name
== VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX
||
3714 name
== VGPU10_NAME_VIEWPORT_ARRAY_INDEX
||
3715 name
== VGPU10_NAME_CLIP_DISTANCE
);
3717 check_register_index(emit
, type
, index
);
3719 opcode0
.value
= operand0
.value
= name_token
.value
= 0;
3721 opcode0
.opcodeType
= type
;
3722 operand0
.operandType
= VGPU10_OPERAND_TYPE_OUTPUT
;
3723 operand0
.numComponents
= VGPU10_OPERAND_4_COMPONENT
;
3724 operand0
.selectionMode
= VGPU10_OPERAND_4_COMPONENT_MASK_MODE
;
3725 operand0
.mask
= writemask
;
3726 operand0
.indexDimension
= VGPU10_OPERAND_INDEX_1D
;
3727 operand0
.index0Representation
= VGPU10_OPERAND_INDEX_IMMEDIATE32
;
3729 name_token
.name
= name
;
3731 emit_decl_instruction(emit
, opcode0
, operand0
, name_token
, index
, 1);
3733 /* Capture output signature */
3735 struct svga_shader_signature
*sgn
= &emit
->signature
;
3736 SVGA3dDXShaderSignatureEntry
*sgnEntry
=
3737 &sgn
->outputs
[sgn
->header
.numOutputSignatures
++];
3738 set_shader_signature_entry(sgnEntry
, index
,
3740 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN
,
3741 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT
);
3744 if (emit
->index_range
.required
) {
3745 /* Here, index_range declaration is only applicable for opcodeType
3746 * VGPU10_OPCODE_DCL_OUTPUT and for operandType
3747 * VGPU10_OPERAND_TYPE_OUTPUT.
3749 if (type
!= VGPU10_OPCODE_DCL_OUTPUT
) {
3750 if (emit
->index_range
.start_index
!= INVALID_INDEX
) {
3751 emit_index_range_declaration(emit
);
3756 if (emit
->index_range
.operandType
== VGPU10_NUM_OPERANDS
) {
3757 /* Need record new index_range */
3758 emit
->index_range
.count
= 1;
3759 emit
->index_range
.operandType
= VGPU10_OPERAND_TYPE_OUTPUT
;
3760 emit
->index_range
.start_index
= index
;
3761 emit
->index_range
.size
= 1;
3762 emit
->index_range
.dim
= VGPU10_OPERAND_INDEX_1D
;
3765 (emit
->index_range
.start_index
+ emit
->index_range
.count
)) {
3766 /* Output index is not contiguous with index range. We need to
3767 * emit current index_range first and then start recording next
3770 emit_index_range_declaration(emit
);
3772 emit
->index_range
.count
= 1;
3773 emit
->index_range
.operandType
= VGPU10_OPERAND_TYPE_OUTPUT
;
3774 emit
->index_range
.start_index
= index
;
3775 emit
->index_range
.size
= 1;
3776 emit
->index_range
.dim
= VGPU10_OPERAND_INDEX_1D
;
3779 /* Since output index is contiguous with index range, increment
3780 * index range count.
3782 emit
->index_range
.count
++;
3789 * Emit the declaration for the fragment depth output.
3792 emit_fragdepth_output_declaration(struct svga_shader_emitter_v10
*emit
)
3794 VGPU10OpcodeToken0 opcode0
;
3795 VGPU10OperandToken0 operand0
;
3796 VGPU10NameToken name_token
;
3798 assert(emit
->unit
== PIPE_SHADER_FRAGMENT
);
3800 opcode0
.value
= operand0
.value
= name_token
.value
= 0;
3802 opcode0
.opcodeType
= VGPU10_OPCODE_DCL_OUTPUT
;
3803 operand0
.operandType
= VGPU10_OPERAND_TYPE_OUTPUT_DEPTH
;
3804 operand0
.numComponents
= VGPU10_OPERAND_1_COMPONENT
;
3805 operand0
.indexDimension
= VGPU10_OPERAND_INDEX_0D
;
3808 emit_decl_instruction(emit
, opcode0
, operand0
, name_token
, 0, 1);
3813 * Emit the declaration for the fragment sample mask/coverage output.
3816 emit_samplemask_output_declaration(struct svga_shader_emitter_v10
*emit
)
3818 VGPU10OpcodeToken0 opcode0
;
3819 VGPU10OperandToken0 operand0
;
3820 VGPU10NameToken name_token
;
3822 assert(emit
->unit
== PIPE_SHADER_FRAGMENT
);
3823 assert(emit
->version
>= 41);
3825 opcode0
.value
= operand0
.value
= name_token
.value
= 0;
3827 opcode0
.opcodeType
= VGPU10_OPCODE_DCL_OUTPUT
;
3828 operand0
.operandType
= VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK
;
3829 operand0
.numComponents
= VGPU10_OPERAND_0_COMPONENT
;
3830 operand0
.indexDimension
= VGPU10_OPERAND_INDEX_0D
;
3833 emit_decl_instruction(emit
, opcode0
, operand0
, name_token
, 0, 1);
3838 * Emit output declarations for fragment shader.
3841 emit_fs_output_declarations(struct svga_shader_emitter_v10
*emit
)
3845 for (i
= 0; i
< emit
->info
.num_outputs
; i
++) {
3846 /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
3847 const enum tgsi_semantic semantic_name
=
3848 emit
->info
.output_semantic_name
[i
];
3849 const unsigned semantic_index
= emit
->info
.output_semantic_index
[i
];
3852 if (semantic_name
== TGSI_SEMANTIC_COLOR
) {
3853 assert(semantic_index
< ARRAY_SIZE(emit
->fs
.color_out_index
));
3855 emit
->fs
.color_out_index
[semantic_index
] = index
;
3857 emit
->fs
.num_color_outputs
= MAX2(emit
->fs
.num_color_outputs
,
3860 /* The semantic index is the shader's color output/buffer index */
3861 emit_output_declaration(emit
,
3862 VGPU10_OPCODE_DCL_OUTPUT
, semantic_index
,
3863 VGPU10_NAME_UNDEFINED
,
3864 VGPU10_OPERAND_4_COMPONENT_MASK_ALL
,
3866 map_tgsi_semantic_to_sgn_name(semantic_name
));
3868 if (semantic_index
== 0) {
3869 if (emit
->key
.fs
.write_color0_to_n_cbufs
> 1) {
3870 /* Emit declarations for the additional color outputs
3874 for (j
= 1; j
< emit
->key
.fs
.write_color0_to_n_cbufs
; j
++) {
3875 /* Allocate a new output index */
3876 unsigned idx
= emit
->info
.num_outputs
+ j
- 1;
3877 emit
->fs
.color_out_index
[j
] = idx
;
3878 emit_output_declaration(emit
,
3879 VGPU10_OPCODE_DCL_OUTPUT
, idx
,
3880 VGPU10_NAME_UNDEFINED
,
3881 VGPU10_OPERAND_4_COMPONENT_MASK_ALL
,
3883 map_tgsi_semantic_to_sgn_name(semantic_name
));
3884 emit
->info
.output_semantic_index
[idx
] = j
;
3887 emit
->fs
.num_color_outputs
=
3888 emit
->key
.fs
.write_color0_to_n_cbufs
;
3892 else if (semantic_name
== TGSI_SEMANTIC_POSITION
) {
3893 /* Fragment depth output */
3894 emit_fragdepth_output_declaration(emit
);
3896 else if (semantic_name
== TGSI_SEMANTIC_SAMPLEMASK
) {
3897 /* Sample mask output */
3898 emit_samplemask_output_declaration(emit
);
3901 assert(!"Bad output semantic name");
3908 * Emit common output declaration for vertex processing.
3911 emit_vertex_output_declaration(struct svga_shader_emitter_v10
*emit
,
3912 unsigned index
, unsigned writemask
,
3913 boolean addSignature
)
3915 const enum tgsi_semantic semantic_name
=
3916 emit
->info
.output_semantic_name
[index
];
3917 const unsigned semantic_index
= emit
->info
.output_semantic_index
[index
];
3918 unsigned name
, type
;
3919 unsigned final_mask
= VGPU10_OPERAND_4_COMPONENT_MASK_ALL
;
3921 assert(emit
->unit
!= PIPE_SHADER_FRAGMENT
&&
3922 emit
->unit
!= PIPE_SHADER_COMPUTE
);
3924 switch (semantic_name
) {
3925 case TGSI_SEMANTIC_POSITION
:
3926 if (emit
->unit
== PIPE_SHADER_TESS_CTRL
) {
3927 /* position will be declared in control point only */
3928 assert(emit
->tcs
.control_point_phase
);
3929 type
= VGPU10_OPCODE_DCL_OUTPUT
;
3930 name
= VGPU10_NAME_UNDEFINED
;
3931 emit_output_declaration(emit
, type
, index
, name
, final_mask
, TRUE
,
3932 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
);
3936 type
= VGPU10_OPCODE_DCL_OUTPUT_SIV
;
3937 name
= VGPU10_NAME_POSITION
;
3939 /* Save the index of the vertex position output register */
3940 emit
->vposition
.out_index
= index
;
3942 case TGSI_SEMANTIC_CLIPDIST
:
3943 type
= VGPU10_OPCODE_DCL_OUTPUT_SIV
;
3944 name
= VGPU10_NAME_CLIP_DISTANCE
;
3945 /* save the starting index of the clip distance output register */
3946 if (semantic_index
== 0)
3947 emit
->clip_dist_out_index
= index
;
3948 final_mask
= apply_clip_plane_mask(emit
, writemask
, semantic_index
);
3949 if (final_mask
== 0x0)
3950 return; /* discard this do-nothing declaration */
3952 case TGSI_SEMANTIC_CLIPVERTEX
:
3953 type
= VGPU10_OPCODE_DCL_OUTPUT
;
3954 name
= VGPU10_NAME_UNDEFINED
;
3955 emit
->clip_vertex_out_index
= index
;
3958 /* generic output */
3959 type
= VGPU10_OPCODE_DCL_OUTPUT
;
3960 name
= VGPU10_NAME_UNDEFINED
;
3963 emit_output_declaration(emit
, type
, index
, name
, final_mask
, addSignature
,
3964 map_tgsi_semantic_to_sgn_name(semantic_name
));
3969 * Emit declaration for outputs in vertex shader.
3972 emit_vs_output_declarations(struct svga_shader_emitter_v10
*emit
)
3975 for (i
= 0; i
< emit
->info
.num_outputs
; i
++) {
3976 emit_vertex_output_declaration(emit
, i
, emit
->output_usage_mask
[i
], TRUE
);
3982 * A helper function to determine the writemask for an output
3983 * for the specified stream.
3986 output_writemask_for_stream(unsigned stream
, ubyte output_streams
,
3987 ubyte output_usagemask
)
3990 unsigned writemask
= 0;
3992 for (i
= 0; i
< 4; i
++) {
3993 if ((output_streams
& 0x3) == stream
)
3994 writemask
|= (VGPU10_OPERAND_4_COMPONENT_MASK_X
<< i
);
3995 output_streams
>>= 2;
3997 return writemask
& output_usagemask
;
4002 * Emit declaration for outputs in geometry shader.
4005 emit_gs_output_declarations(struct svga_shader_emitter_v10
*emit
)
4008 VGPU10OpcodeToken0 opcode0
;
4009 unsigned numStreamsSupported
= 1;
4012 if (emit
->version
>= 50) {
4013 numStreamsSupported
= ARRAY_SIZE(emit
->info
.num_stream_output_components
);
4017 * Start emitting from the last stream first, so we end with
4018 * stream 0, so any of the auxiliary output declarations will
4021 for (s
= numStreamsSupported
-1; s
>= 0; s
--) {
4023 if (emit
->info
.num_stream_output_components
[s
] == 0)
4026 if (emit
->version
>= 50) {
4027 /* DCL_STREAM stream */
4028 begin_emit_instruction(emit
);
4029 emit_opcode(emit
, VGPU10_OPCODE_DCL_STREAM
, FALSE
);
4030 emit_stream_register(emit
, s
);
4031 end_emit_instruction(emit
);
4034 /* emit output primitive topology declaration */
4036 opcode0
.opcodeType
= VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY
;
4037 opcode0
.primitiveTopology
= emit
->gs
.prim_topology
;
4038 emit_property_instruction(emit
, opcode0
, 0, 0);
4040 for (i
= 0; i
< emit
->info
.num_outputs
; i
++) {
4043 /* find out the writemask for this stream */
4044 writemask
= output_writemask_for_stream(s
, emit
->info
.output_streams
[i
],
4045 emit
->output_usage_mask
[i
]);
4048 enum tgsi_semantic semantic_name
=
4049 emit
->info
.output_semantic_name
[i
];
4051 /* TODO: Still need to take care of a special case where a
4052 * single varying spans across multiple output registers.
4054 switch(semantic_name
) {
4055 case TGSI_SEMANTIC_PRIMID
:
4056 emit_output_declaration(emit
,
4057 VGPU10_OPCODE_DCL_OUTPUT_SGV
, i
,
4058 VGPU10_NAME_PRIMITIVE_ID
,
4059 VGPU10_OPERAND_4_COMPONENT_MASK_ALL
,
4061 map_tgsi_semantic_to_sgn_name(semantic_name
));
4063 case TGSI_SEMANTIC_LAYER
:
4064 emit_output_declaration(emit
,
4065 VGPU10_OPCODE_DCL_OUTPUT_SIV
, i
,
4066 VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX
,
4067 VGPU10_OPERAND_4_COMPONENT_MASK_X
,
4069 map_tgsi_semantic_to_sgn_name(semantic_name
));
4071 case TGSI_SEMANTIC_VIEWPORT_INDEX
:
4072 emit_output_declaration(emit
,
4073 VGPU10_OPCODE_DCL_OUTPUT_SIV
, i
,
4074 VGPU10_NAME_VIEWPORT_ARRAY_INDEX
,
4075 VGPU10_OPERAND_4_COMPONENT_MASK_X
,
4077 map_tgsi_semantic_to_sgn_name(semantic_name
));
4078 emit
->gs
.viewport_index_out_index
= i
;
4081 emit_vertex_output_declaration(emit
, i
, writemask
, FALSE
);
4087 /* For geometry shader outputs, it is possible the same register is
4088 * declared multiple times for different streams. So to avoid
4089 * redundant signature entries, geometry shader output signature is done
4090 * outside of the declaration.
4092 struct svga_shader_signature
*sgn
= &emit
->signature
;
4093 SVGA3dDXShaderSignatureEntry
*sgnEntry
;
4095 for (i
= 0; i
< emit
->info
.num_outputs
; i
++) {
4096 if (emit
->output_usage_mask
[i
]) {
4097 enum tgsi_semantic sem_name
= emit
->info
.output_semantic_name
[i
];
4099 sgnEntry
= &sgn
->outputs
[sgn
->header
.numOutputSignatures
++];
4100 set_shader_signature_entry(sgnEntry
, i
,
4101 map_tgsi_semantic_to_sgn_name(sem_name
),
4102 emit
->output_usage_mask
[i
],
4103 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN
,
4104 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT
);
4111 * Emit the declaration for the tess inner/outer output.
4112 * \param opcodeType either VGPU10_OPCODE_DCL_OUTPUT_SIV or _INPUT_SIV
4113 * \param operandType either VGPU10_OPERAND_TYPE_OUTPUT or _INPUT
4114 * \param name VGPU10_NAME_FINAL_*_TESSFACTOR value
4117 emit_tesslevel_declaration(struct svga_shader_emitter_v10
*emit
,
4118 unsigned index
, unsigned opcodeType
,
4119 unsigned operandType
, VGPU10_SYSTEM_NAME name
,
4120 SVGA3dDXSignatureSemanticName sgnName
)
4122 VGPU10OpcodeToken0 opcode0
;
4123 VGPU10OperandToken0 operand0
;
4124 VGPU10NameToken name_token
;
4126 assert(emit
->version
>= 50);
4127 assert(name
>= VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR
||
4128 (emit
->key
.tcs
.prim_mode
== PIPE_PRIM_LINES
&&
4129 name
== VGPU10_NAME_UNDEFINED
));
4130 assert(name
<= VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR
);
4132 assert(operandType
== VGPU10_OPERAND_TYPE_OUTPUT
||
4133 operandType
== VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT
);
4135 opcode0
.value
= operand0
.value
= name_token
.value
= 0;
4137 opcode0
.opcodeType
= opcodeType
;
4138 operand0
.operandType
= operandType
;
4139 operand0
.numComponents
= VGPU10_OPERAND_4_COMPONENT
;
4140 operand0
.indexDimension
= VGPU10_OPERAND_INDEX_1D
;
4141 operand0
.mask
= VGPU10_OPERAND_4_COMPONENT_MASK_X
;
4142 operand0
.selectionMode
= VGPU10_OPERAND_4_COMPONENT_MASK_MODE
;
4143 operand0
.index0Representation
= VGPU10_OPERAND_INDEX_IMMEDIATE32
;
4145 name_token
.name
= name
;
4146 emit_decl_instruction(emit
, opcode0
, operand0
, name_token
, index
, 1);
4148 /* Capture patch constant signature */
4149 struct svga_shader_signature
*sgn
= &emit
->signature
;
4150 SVGA3dDXShaderSignatureEntry
*sgnEntry
=
4151 &sgn
->patchConstants
[sgn
->header
.numPatchConstantSignatures
++];
4152 set_shader_signature_entry(sgnEntry
, index
,
4153 sgnName
, VGPU10_OPERAND_4_COMPONENT_MASK_X
,
4154 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN
,
4155 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT
);
4160 * Emit output declarations for tessellation control shader.
4163 emit_tcs_output_declarations(struct svga_shader_emitter_v10
*emit
)
4166 unsigned outputIndex
= emit
->num_outputs
;
4167 struct svga_shader_signature
*sgn
= &emit
->signature
;
4170 * Initialize patch_generic_out_count so it won't be counted twice
4171 * since this function is called twice, one for control point phase
4172 * and another time for patch constant phase.
4174 emit
->tcs
.patch_generic_out_count
= 0;
4176 for (i
= 0; i
< emit
->info
.num_outputs
; i
++) {
4178 const enum tgsi_semantic semantic_name
=
4179 emit
->info
.output_semantic_name
[i
];
4181 switch (semantic_name
) {
4182 case TGSI_SEMANTIC_TESSINNER
:
4183 emit
->tcs
.inner
.tgsi_index
= i
;
4185 /* skip per-patch output declarations in control point phase */
4186 if (emit
->tcs
.control_point_phase
)
4189 emit
->tcs
.inner
.out_index
= outputIndex
;
4190 switch (emit
->key
.tcs
.prim_mode
) {
4191 case PIPE_PRIM_QUADS
:
4192 emit_tesslevel_declaration(emit
, outputIndex
++,
4193 VGPU10_OPCODE_DCL_OUTPUT_SIV
, VGPU10_OPERAND_TYPE_OUTPUT
,
4194 VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR
,
4195 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR
);
4197 emit_tesslevel_declaration(emit
, outputIndex
++,
4198 VGPU10_OPCODE_DCL_OUTPUT_SIV
, VGPU10_OPERAND_TYPE_OUTPUT
,
4199 VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR
,
4200 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR
);
4202 case PIPE_PRIM_TRIANGLES
:
4203 emit_tesslevel_declaration(emit
, outputIndex
++,
4204 VGPU10_OPCODE_DCL_OUTPUT_SIV
, VGPU10_OPERAND_TYPE_OUTPUT
,
4205 VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR
,
4206 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR
);
4208 case PIPE_PRIM_LINES
:
4211 debug_printf("Unsupported primitive type");
4215 case TGSI_SEMANTIC_TESSOUTER
:
4216 emit
->tcs
.outer
.tgsi_index
= i
;
4218 /* skip per-patch output declarations in control point phase */
4219 if (emit
->tcs
.control_point_phase
)
4222 emit
->tcs
.outer
.out_index
= outputIndex
;
4223 switch (emit
->key
.tcs
.prim_mode
) {
4224 case PIPE_PRIM_QUADS
:
4225 for (int j
= 0; j
< 4; j
++) {
4226 emit_tesslevel_declaration(emit
, outputIndex
++,
4227 VGPU10_OPCODE_DCL_OUTPUT_SIV
, VGPU10_OPERAND_TYPE_OUTPUT
,
4228 VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR
+ j
,
4229 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR
+ j
);
4232 case PIPE_PRIM_TRIANGLES
:
4233 for (int j
= 0; j
< 3; j
++) {
4234 emit_tesslevel_declaration(emit
, outputIndex
++,
4235 VGPU10_OPCODE_DCL_OUTPUT_SIV
, VGPU10_OPERAND_TYPE_OUTPUT
,
4236 VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR
+ j
,
4237 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR
+ j
);
4240 case PIPE_PRIM_LINES
:
4241 for (int j
= 0; j
< 2; j
++) {
4242 emit_tesslevel_declaration(emit
, outputIndex
++,
4243 VGPU10_OPCODE_DCL_OUTPUT_SIV
, VGPU10_OPERAND_TYPE_OUTPUT
,
4244 VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR
+ j
,
4245 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR
+ j
);
4249 debug_printf("Unsupported primitive type");
4253 case TGSI_SEMANTIC_PATCH
:
4254 if (emit
->tcs
.patch_generic_out_index
== INVALID_INDEX
)
4255 emit
->tcs
.patch_generic_out_index
= i
;
4256 emit
->tcs
.patch_generic_out_count
++;
4258 /* skip per-patch output declarations in control point phase */
4259 if (emit
->tcs
.control_point_phase
)
4262 emit_output_declaration(emit
, VGPU10_OPCODE_DCL_OUTPUT
, index
,
4263 VGPU10_NAME_UNDEFINED
,
4264 VGPU10_OPERAND_4_COMPONENT_MASK_ALL
,
4266 map_tgsi_semantic_to_sgn_name(semantic_name
));
4268 SVGA3dDXShaderSignatureEntry
*sgnEntry
=
4269 &sgn
->patchConstants
[sgn
->header
.numPatchConstantSignatures
++];
4270 set_shader_signature_entry(sgnEntry
, index
,
4271 map_tgsi_semantic_to_sgn_name(semantic_name
),
4272 VGPU10_OPERAND_4_COMPONENT_MASK_ALL
,
4273 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN
,
4274 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT
);
4279 /* save the starting index of control point outputs */
4280 if (emit
->tcs
.control_point_out_index
== INVALID_INDEX
)
4281 emit
->tcs
.control_point_out_index
= i
;
4282 emit
->tcs
.control_point_out_count
++;
4284 /* skip control point output declarations in patch constant phase */
4285 if (!emit
->tcs
.control_point_phase
)
4288 emit_vertex_output_declaration(emit
, i
, emit
->output_usage_mask
[i
],
4294 if (emit
->tcs
.control_point_phase
) {
4296 * Add missing control point output in control point phase.
4298 if (emit
->tcs
.control_point_out_index
== INVALID_INDEX
) {
4299 /* use register index after tessellation factors */
4300 switch (emit
->key
.tcs
.prim_mode
) {
4301 case PIPE_PRIM_QUADS
:
4302 emit
->tcs
.control_point_out_index
= outputIndex
+ 6;
4304 case PIPE_PRIM_TRIANGLES
:
4305 emit
->tcs
.control_point_out_index
= outputIndex
+ 4;
4308 emit
->tcs
.control_point_out_index
= outputIndex
+ 2;
4311 emit
->tcs
.control_point_out_count
++;
4312 emit_output_declaration(emit
, VGPU10_OPCODE_DCL_OUTPUT_SIV
,
4313 emit
->tcs
.control_point_out_index
,
4314 VGPU10_NAME_POSITION
,
4315 VGPU10_OPERAND_4_COMPONENT_MASK_ALL
,
4317 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION
);
4319 /* If tcs does not output any control point output,
4320 * we can end the hull shader control point phase here
4321 * after emitting the default control point output.
4323 emit
->skip_instruction
= TRUE
;
4327 if (emit
->tcs
.outer
.out_index
== INVALID_INDEX
) {
4328 /* since the TCS did not declare out outer tess level output register,
4329 * we declare it here for patch constant phase only.
4331 emit
->tcs
.outer
.out_index
= outputIndex
;
4332 if (emit
->key
.tcs
.prim_mode
== PIPE_PRIM_QUADS
) {
4333 for (int i
= 0; i
< 4; i
++) {
4334 emit_tesslevel_declaration(emit
, outputIndex
++,
4335 VGPU10_OPCODE_DCL_OUTPUT_SIV
, VGPU10_OPERAND_TYPE_OUTPUT
,
4336 VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR
+ i
,
4337 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR
+ i
);
4340 else if (emit
->key
.tcs
.prim_mode
== PIPE_PRIM_TRIANGLES
) {
4341 for (int i
= 0; i
< 3; i
++) {
4342 emit_tesslevel_declaration(emit
, outputIndex
++,
4343 VGPU10_OPCODE_DCL_OUTPUT_SIV
, VGPU10_OPERAND_TYPE_OUTPUT
,
4344 VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR
+ i
,
4345 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR
+ i
);
4350 if (emit
->tcs
.inner
.out_index
== INVALID_INDEX
) {
4351 /* since the TCS did not declare out inner tess level output register,
4352 * we declare it here
4354 emit
->tcs
.inner
.out_index
= outputIndex
;
4355 if (emit
->key
.tcs
.prim_mode
== PIPE_PRIM_QUADS
) {
4356 emit_tesslevel_declaration(emit
, outputIndex
++,
4357 VGPU10_OPCODE_DCL_OUTPUT_SIV
, VGPU10_OPERAND_TYPE_OUTPUT
,
4358 VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR
,
4359 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR
);
4360 emit_tesslevel_declaration(emit
, outputIndex
++,
4361 VGPU10_OPCODE_DCL_OUTPUT_SIV
, VGPU10_OPERAND_TYPE_OUTPUT
,
4362 VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR
,
4363 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR
);
4365 else if (emit
->key
.tcs
.prim_mode
== PIPE_PRIM_TRIANGLES
) {
4366 emit_tesslevel_declaration(emit
, outputIndex
++,
4367 VGPU10_OPCODE_DCL_OUTPUT_SIV
, VGPU10_OPERAND_TYPE_OUTPUT
,
4368 VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR
,
4369 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR
);
4373 emit
->num_outputs
= outputIndex
;
4378 * Emit output declarations for tessellation evaluation shader.
4381 emit_tes_output_declarations(struct svga_shader_emitter_v10
*emit
)
4385 for (i
= 0; i
< emit
->info
.num_outputs
; i
++) {
4386 emit_vertex_output_declaration(emit
, i
, emit
->output_usage_mask
[i
], TRUE
);
4392 * Emit the declaration for a system value input/output.
4395 emit_system_value_declaration(struct svga_shader_emitter_v10
*emit
,
4396 enum tgsi_semantic semantic_name
, unsigned index
)
4398 switch (semantic_name
) {
4399 case TGSI_SEMANTIC_INSTANCEID
:
4400 index
= alloc_system_value_index(emit
, index
);
4401 emit_input_declaration(emit
, VGPU10_OPCODE_DCL_INPUT_SIV
,
4402 VGPU10_OPERAND_TYPE_INPUT
,
4403 VGPU10_OPERAND_INDEX_1D
,
4405 VGPU10_NAME_INSTANCE_ID
,
4406 VGPU10_OPERAND_4_COMPONENT
,
4407 VGPU10_OPERAND_4_COMPONENT_MASK_MODE
,
4408 VGPU10_OPERAND_4_COMPONENT_MASK_X
,
4409 VGPU10_INTERPOLATION_UNDEFINED
, TRUE
,
4410 map_tgsi_semantic_to_sgn_name(semantic_name
));
4412 case TGSI_SEMANTIC_VERTEXID
:
4413 emit
->vs
.vertex_id_sys_index
= index
;
4414 index
= alloc_system_value_index(emit
, index
);
4415 emit_input_declaration(emit
, VGPU10_OPCODE_DCL_INPUT_SIV
,
4416 VGPU10_OPERAND_TYPE_INPUT
,
4417 VGPU10_OPERAND_INDEX_1D
,
4419 VGPU10_NAME_VERTEX_ID
,
4420 VGPU10_OPERAND_4_COMPONENT
,
4421 VGPU10_OPERAND_4_COMPONENT_MASK_MODE
,
4422 VGPU10_OPERAND_4_COMPONENT_MASK_X
,
4423 VGPU10_INTERPOLATION_UNDEFINED
, TRUE
,
4424 map_tgsi_semantic_to_sgn_name(semantic_name
));
4426 case TGSI_SEMANTIC_SAMPLEID
:
4427 assert(emit
->unit
== PIPE_SHADER_FRAGMENT
);
4428 emit
->fs
.sample_id_sys_index
= index
;
4429 index
= alloc_system_value_index(emit
, index
);
4430 emit_input_declaration(emit
, VGPU10_OPCODE_DCL_INPUT_PS_SIV
,
4431 VGPU10_OPERAND_TYPE_INPUT
,
4432 VGPU10_OPERAND_INDEX_1D
,
4434 VGPU10_NAME_SAMPLE_INDEX
,
4435 VGPU10_OPERAND_4_COMPONENT
,
4436 VGPU10_OPERAND_4_COMPONENT_MASK_MODE
,
4437 VGPU10_OPERAND_4_COMPONENT_MASK_X
,
4438 VGPU10_INTERPOLATION_CONSTANT
, TRUE
,
4439 map_tgsi_semantic_to_sgn_name(semantic_name
));
4441 case TGSI_SEMANTIC_SAMPLEPOS
:
4442 /* This system value contains the position of the current sample
4443 * when using per-sample shading. We implement this by calling
4444 * the VGPU10_OPCODE_SAMPLE_POS instruction with the current sample
4445 * index as the argument. See emit_sample_position_instructions().
4447 assert(emit
->version
>= 41);
4448 emit
->fs
.sample_pos_sys_index
= index
;
4449 index
= alloc_system_value_index(emit
, index
);
4451 case TGSI_SEMANTIC_INVOCATIONID
:
4452 /* Note: invocation id input is mapped to different register depending
4453 * on the shader type. In GS, it will be mapped to vGSInstanceID#.
4454 * In TCS, it will be mapped to vOutputControlPointID#.
4455 * Since in both cases, the mapped name is unique rather than
4456 * just a generic input name ("v#"), so there is no need to remap
4459 assert(emit
->unit
== PIPE_SHADER_GEOMETRY
||
4460 emit
->unit
== PIPE_SHADER_TESS_CTRL
);
4461 assert(emit
->version
>= 50);
4463 if (emit
->unit
== PIPE_SHADER_GEOMETRY
) {
4464 emit
->gs
.invocation_id_sys_index
= index
;
4465 emit_input_declaration(emit
, VGPU10_OPCODE_DCL_INPUT
,
4466 VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID
,
4467 VGPU10_OPERAND_INDEX_0D
,
4469 VGPU10_NAME_UNDEFINED
,
4470 VGPU10_OPERAND_0_COMPONENT
,
4471 VGPU10_OPERAND_4_COMPONENT_MASK_MODE
,
4473 VGPU10_INTERPOLATION_UNDEFINED
, TRUE
,
4474 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
);
4475 } else if (emit
->unit
== PIPE_SHADER_TESS_CTRL
) {
4476 /* The emission of the control point id will be done
4477 * in the control point phase in emit_hull_shader_control_point_phase().
4479 emit
->tcs
.invocation_id_sys_index
= index
;
4482 case TGSI_SEMANTIC_SAMPLEMASK
:
4483 /* Note: the PS sample mask input has a unique name ("vCoverage#")
4484 * rather than just a generic input name ("v#") so no need to remap the
4487 assert(emit
->unit
== PIPE_SHADER_FRAGMENT
);
4488 assert(emit
->version
>= 50);
4489 emit
->fs
.sample_mask_in_sys_index
= index
;
4490 emit_input_declaration(emit
, VGPU10_OPCODE_DCL_INPUT
,
4491 VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK
,
4492 VGPU10_OPERAND_INDEX_0D
,
4494 VGPU10_NAME_UNDEFINED
,
4495 VGPU10_OPERAND_1_COMPONENT
,
4496 VGPU10_OPERAND_4_COMPONENT_MASK_MODE
,
4498 VGPU10_INTERPOLATION_CONSTANT
, TRUE
,
4499 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
);
4501 case TGSI_SEMANTIC_TESSCOORD
:
4502 assert(emit
->version
>= 50);
4504 unsigned usageMask
= 0;
4506 if (emit
->tes
.prim_mode
== PIPE_PRIM_TRIANGLES
) {
4507 usageMask
= VGPU10_OPERAND_4_COMPONENT_MASK_XYZ
;
4509 else if (emit
->tes
.prim_mode
== PIPE_PRIM_LINES
||
4510 emit
->tes
.prim_mode
== PIPE_PRIM_QUADS
) {
4511 usageMask
= VGPU10_OPERAND_4_COMPONENT_MASK_XY
;
4514 emit
->tes
.tesscoord_sys_index
= index
;
4515 emit_input_declaration(emit
, VGPU10_OPCODE_DCL_INPUT
,
4516 VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT
,
4517 VGPU10_OPERAND_INDEX_0D
,
4519 VGPU10_NAME_UNDEFINED
,
4520 VGPU10_OPERAND_4_COMPONENT
,
4521 VGPU10_OPERAND_4_COMPONENT_MASK_MODE
,
4523 VGPU10_INTERPOLATION_UNDEFINED
, TRUE
,
4524 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
);
4526 case TGSI_SEMANTIC_TESSINNER
:
4527 assert(emit
->version
>= 50);
4528 emit
->tes
.inner
.tgsi_index
= index
;
4530 case TGSI_SEMANTIC_TESSOUTER
:
4531 assert(emit
->version
>= 50);
4532 emit
->tes
.outer
.tgsi_index
= index
;
4534 case TGSI_SEMANTIC_VERTICESIN
:
4535 assert(emit
->unit
== PIPE_SHADER_TESS_CTRL
);
4536 assert(emit
->version
>= 50);
4538 /* save the system value index */
4539 emit
->tcs
.vertices_per_patch_index
= index
;
4541 case TGSI_SEMANTIC_PRIMID
:
4542 assert(emit
->version
>= 50);
4543 if (emit
->unit
== PIPE_SHADER_TESS_CTRL
) {
4544 emit
->tcs
.prim_id_index
= index
;
4546 else if (emit
->unit
== PIPE_SHADER_TESS_EVAL
) {
4547 emit
->tes
.prim_id_index
= index
;
4548 emit_input_declaration(emit
, VGPU10_OPCODE_DCL_INPUT
,
4549 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID
,
4550 VGPU10_OPERAND_INDEX_0D
,
4552 VGPU10_NAME_UNDEFINED
,
4553 VGPU10_OPERAND_0_COMPONENT
,
4554 VGPU10_OPERAND_4_COMPONENT_MASK_MODE
,
4556 VGPU10_INTERPOLATION_UNDEFINED
, TRUE
,
4557 map_tgsi_semantic_to_sgn_name(semantic_name
));
4561 debug_printf("unexpected system value semantic index %u / %s\n",
4562 semantic_name
, tgsi_semantic_names
[semantic_name
]);
4567 * Translate a TGSI declaration to VGPU10.
4570 emit_vgpu10_declaration(struct svga_shader_emitter_v10
*emit
,
4571 const struct tgsi_full_declaration
*decl
)
4573 switch (decl
->Declaration
.File
) {
4574 case TGSI_FILE_INPUT
:
4575 /* do nothing - see emit_input_declarations() */
4578 case TGSI_FILE_OUTPUT
:
4579 assert(decl
->Range
.First
== decl
->Range
.Last
);
4580 emit
->output_usage_mask
[decl
->Range
.First
] = decl
->Declaration
.UsageMask
;
4583 case TGSI_FILE_TEMPORARY
:
4584 /* Don't declare the temps here. Just keep track of how many
4585 * and emit the declaration later.
4587 if (decl
->Declaration
.Array
) {
4588 /* Indexed temporary array. Save the start index of the array
4589 * and the size of the array.
4591 const unsigned arrayID
= MIN2(decl
->Array
.ArrayID
, MAX_TEMP_ARRAYS
);
4592 assert(arrayID
< ARRAY_SIZE(emit
->temp_arrays
));
4594 /* Save this array so we can emit the declaration for it later */
4595 create_temp_array(emit
, arrayID
, decl
->Range
.First
,
4596 decl
->Range
.Last
- decl
->Range
.First
+ 1,
4600 /* for all temps, indexed or not, keep track of highest index */
4601 emit
->num_shader_temps
= MAX2(emit
->num_shader_temps
,
4602 decl
->Range
.Last
+ 1);
4605 case TGSI_FILE_CONSTANT
:
4606 /* Don't declare constants here. Just keep track and emit later. */
4608 unsigned constbuf
= 0, num_consts
;
4609 if (decl
->Declaration
.Dimension
) {
4610 constbuf
= decl
->Dim
.Index2D
;
4612 /* We throw an assertion here when, in fact, the shader should never
4613 * have linked due to constbuf index out of bounds, so we shouldn't
4614 * have reached here.
4616 assert(constbuf
< ARRAY_SIZE(emit
->num_shader_consts
));
4618 num_consts
= MAX2(emit
->num_shader_consts
[constbuf
],
4619 decl
->Range
.Last
+ 1);
4621 if (num_consts
> VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT
) {
4622 debug_printf("Warning: constant buffer is declared to size [%u]"
4623 " but [%u] is the limit.\n",
4625 VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT
);
4627 /* The linker doesn't enforce the max UBO size so we clamp here */
4628 emit
->num_shader_consts
[constbuf
] =
4629 MIN2(num_consts
, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT
);
4633 case TGSI_FILE_IMMEDIATE
:
4634 assert(!"TGSI_FILE_IMMEDIATE not handled yet!");
4637 case TGSI_FILE_SYSTEM_VALUE
:
4638 emit_system_value_declaration(emit
, decl
->Semantic
.Name
,
4642 case TGSI_FILE_SAMPLER
:
4643 /* Don't declare samplers here. Just keep track and emit later. */
4644 emit
->num_samplers
= MAX2(emit
->num_samplers
, decl
->Range
.Last
+ 1);
4648 case TGSI_FILE_RESOURCE
:
4649 /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
4650 /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
4651 assert(!"TGSI_FILE_RESOURCE not handled yet");
4655 case TGSI_FILE_ADDRESS
:
4656 emit
->num_address_regs
= MAX2(emit
->num_address_regs
,
4657 decl
->Range
.Last
+ 1);
4660 case TGSI_FILE_SAMPLER_VIEW
:
4662 unsigned unit
= decl
->Range
.First
;
4663 assert(decl
->Range
.First
== decl
->Range
.Last
);
4664 emit
->sampler_target
[unit
] = decl
->SamplerView
.Resource
;
4665 /* Note: we can ignore YZW return types for now */
4666 emit
->sampler_return_type
[unit
] = decl
->SamplerView
.ReturnTypeX
;
4667 emit
->sampler_view
[unit
] = TRUE
;
4672 assert(!"Unexpected type of declaration");
4680 * Emit input declarations for fragment shader.
4683 emit_fs_input_declarations(struct svga_shader_emitter_v10
*emit
)
4687 for (i
= 0; i
< emit
->linkage
.num_inputs
; i
++) {
4688 enum tgsi_semantic semantic_name
= emit
->info
.input_semantic_name
[i
];
4689 unsigned usage_mask
= emit
->info
.input_usage_mask
[i
];
4690 unsigned index
= emit
->linkage
.input_map
[i
];
4691 unsigned type
, interpolationMode
, name
;
4692 unsigned mask
= VGPU10_OPERAND_4_COMPONENT_MASK_ALL
;
4694 if (usage_mask
== 0)
4695 continue; /* register is not actually used */
4697 if (semantic_name
== TGSI_SEMANTIC_POSITION
) {
4698 /* fragment position input */
4699 type
= VGPU10_OPCODE_DCL_INPUT_PS_SGV
;
4700 interpolationMode
= VGPU10_INTERPOLATION_LINEAR
;
4701 name
= VGPU10_NAME_POSITION
;
4702 if (usage_mask
& TGSI_WRITEMASK_W
) {
4703 /* we need to replace use of 'w' with '1/w' */
4704 emit
->fs
.fragcoord_input_index
= i
;
4707 else if (semantic_name
== TGSI_SEMANTIC_FACE
) {
4708 /* fragment front-facing input */
4709 type
= VGPU10_OPCODE_DCL_INPUT_PS_SGV
;
4710 interpolationMode
= VGPU10_INTERPOLATION_CONSTANT
;
4711 name
= VGPU10_NAME_IS_FRONT_FACE
;
4712 emit
->fs
.face_input_index
= i
;
4714 else if (semantic_name
== TGSI_SEMANTIC_PRIMID
) {
4716 type
= VGPU10_OPCODE_DCL_INPUT_PS_SGV
;
4717 interpolationMode
= VGPU10_INTERPOLATION_CONSTANT
;
4718 name
= VGPU10_NAME_PRIMITIVE_ID
;
4720 else if (semantic_name
== TGSI_SEMANTIC_SAMPLEID
) {
4721 /* sample index / ID */
4722 type
= VGPU10_OPCODE_DCL_INPUT_PS_SGV
;
4723 interpolationMode
= VGPU10_INTERPOLATION_CONSTANT
;
4724 name
= VGPU10_NAME_SAMPLE_INDEX
;
4726 else if (semantic_name
== TGSI_SEMANTIC_LAYER
) {
4727 /* render target array index */
4728 if (emit
->key
.fs
.layer_to_zero
) {
4730 * The shader from the previous stage does not write to layer,
4731 * so reading the layer index in fragment shader should return 0.
4733 emit
->fs
.layer_input_index
= i
;
4736 type
= VGPU10_OPCODE_DCL_INPUT_PS_SGV
;
4737 interpolationMode
= VGPU10_INTERPOLATION_CONSTANT
;
4738 name
= VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX
;
4739 mask
= VGPU10_OPERAND_4_COMPONENT_MASK_X
;
4742 else if (semantic_name
== TGSI_SEMANTIC_VIEWPORT_INDEX
) {
4743 /* viewport index */
4744 type
= VGPU10_OPCODE_DCL_INPUT_PS_SGV
;
4745 interpolationMode
= VGPU10_INTERPOLATION_CONSTANT
;
4746 name
= VGPU10_NAME_VIEWPORT_ARRAY_INDEX
;
4747 mask
= VGPU10_OPERAND_4_COMPONENT_MASK_X
;
4750 /* general fragment input */
4751 type
= VGPU10_OPCODE_DCL_INPUT_PS
;
4753 translate_interpolation(emit
,
4754 emit
->info
.input_interpolate
[i
],
4755 emit
->info
.input_interpolate_loc
[i
]);
4757 /* keeps track if flat interpolation mode is being used */
4758 emit
->uses_flat_interp
= emit
->uses_flat_interp
||
4759 (interpolationMode
== VGPU10_INTERPOLATION_CONSTANT
);
4761 name
= VGPU10_NAME_UNDEFINED
;
4764 emit_input_declaration(emit
, type
,
4765 VGPU10_OPERAND_TYPE_INPUT
,
4766 VGPU10_OPERAND_INDEX_1D
, index
, 1,
4768 VGPU10_OPERAND_4_COMPONENT
,
4769 VGPU10_OPERAND_4_COMPONENT_MASK_MODE
,
4771 interpolationMode
, TRUE
,
4772 map_tgsi_semantic_to_sgn_name(semantic_name
));
4778 * Emit input declarations for vertex shader.
4781 emit_vs_input_declarations(struct svga_shader_emitter_v10
*emit
)
4785 for (i
= 0; i
< emit
->info
.file_max
[TGSI_FILE_INPUT
] + 1; i
++) {
4786 unsigned usage_mask
= emit
->info
.input_usage_mask
[i
];
4789 if (usage_mask
== 0)
4790 continue; /* register is not actually used */
4792 emit_input_declaration(emit
, VGPU10_OPCODE_DCL_INPUT
,
4793 VGPU10_OPERAND_TYPE_INPUT
,
4794 VGPU10_OPERAND_INDEX_1D
, index
, 1,
4795 VGPU10_NAME_UNDEFINED
,
4796 VGPU10_OPERAND_4_COMPONENT
,
4797 VGPU10_OPERAND_4_COMPONENT_MASK_MODE
,
4798 VGPU10_OPERAND_4_COMPONENT_MASK_ALL
,
4799 VGPU10_INTERPOLATION_UNDEFINED
, TRUE
,
4800 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
);
4806 * Emit input declarations for geometry shader.
4809 emit_gs_input_declarations(struct svga_shader_emitter_v10
*emit
)
4813 for (i
= 0; i
< emit
->info
.num_inputs
; i
++) {
4814 enum tgsi_semantic semantic_name
= emit
->info
.input_semantic_name
[i
];
4815 unsigned usage_mask
= emit
->info
.input_usage_mask
[i
];
4816 unsigned index
= emit
->linkage
.input_map
[i
];
4817 unsigned opcodeType
, operandType
;
4818 unsigned numComp
, selMode
;
4822 if (usage_mask
== 0)
4823 continue; /* register is not actually used */
4825 opcodeType
= VGPU10_OPCODE_DCL_INPUT
;
4826 operandType
= VGPU10_OPERAND_TYPE_INPUT
;
4827 numComp
= VGPU10_OPERAND_4_COMPONENT
;
4828 selMode
= VGPU10_OPERAND_4_COMPONENT_MASK_MODE
;
4829 name
= VGPU10_NAME_UNDEFINED
;
4831 /* all geometry shader inputs are two dimensional except
4834 dim
= VGPU10_OPERAND_INDEX_2D
;
4836 if (semantic_name
== TGSI_SEMANTIC_PRIMID
) {
4838 operandType
= VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID
;
4839 dim
= VGPU10_OPERAND_INDEX_0D
;
4840 numComp
= VGPU10_OPERAND_0_COMPONENT
;
4843 /* also save the register index so we can check for
4844 * primitive id when emit src register. We need to modify the
4845 * operand type, index dimension when emit primitive id src reg.
4847 emit
->gs
.prim_id_index
= i
;
4849 else if (semantic_name
== TGSI_SEMANTIC_POSITION
) {
4850 /* vertex position input */
4851 opcodeType
= VGPU10_OPCODE_DCL_INPUT_SIV
;
4852 name
= VGPU10_NAME_POSITION
;
4855 emit_input_declaration(emit
, opcodeType
, operandType
,
4857 emit
->gs
.input_size
,
4860 VGPU10_OPERAND_4_COMPONENT_MASK_ALL
,
4861 VGPU10_INTERPOLATION_UNDEFINED
, TRUE
,
4862 map_tgsi_semantic_to_sgn_name(semantic_name
));
4868 * Emit input declarations for tessellation control shader.
4871 emit_tcs_input_declarations(struct svga_shader_emitter_v10
*emit
)
4874 unsigned size
= emit
->key
.tcs
.vertices_per_patch
;
4875 unsigned indicesMask
= 0;
4876 boolean addSignature
= TRUE
;
4878 if (!emit
->tcs
.control_point_phase
)
4879 addSignature
= emit
->tcs
.fork_phase_add_signature
;
4881 for (i
= 0; i
< emit
->info
.num_inputs
; i
++) {
4882 unsigned usage_mask
= emit
->info
.input_usage_mask
[i
];
4883 unsigned index
= emit
->linkage
.input_map
[i
];
4884 enum tgsi_semantic semantic_name
= emit
->info
.input_semantic_name
[i
];
4885 VGPU10_SYSTEM_NAME name
= VGPU10_NAME_UNDEFINED
;
4886 VGPU10_OPERAND_TYPE operandType
= VGPU10_OPERAND_TYPE_INPUT
;
4887 SVGA3dDXSignatureSemanticName sgn_name
=
4888 map_tgsi_semantic_to_sgn_name(semantic_name
);
4890 /* indices that are declared */
4891 indicesMask
|= 1 << index
;
4893 if (semantic_name
== TGSI_SEMANTIC_POSITION
||
4894 index
== emit
->linkage
.position_index
) {
4895 /* save the input control point index for later use */
4896 emit
->tcs
.control_point_input_index
= i
;
4898 else if (usage_mask
== 0) {
4899 continue; /* register is not actually used */
4901 else if (semantic_name
== TGSI_SEMANTIC_CLIPDIST
) {
4902 /* The shadow copy is being used here. So set the signature name
4905 sgn_name
= SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
;
4908 /* input control points in the patch constant phase are emitted in the
4909 * vicp register rather than the v register.
4911 if (!emit
->tcs
.control_point_phase
) {
4912 operandType
= VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT
;
4915 /* Tessellation control shader inputs are two dimensional.
4916 * The array size is determined by the patch vertex count.
4918 emit_input_declaration(emit
, VGPU10_OPCODE_DCL_INPUT
,
4920 VGPU10_OPERAND_INDEX_2D
,
4922 VGPU10_OPERAND_4_COMPONENT
,
4923 VGPU10_OPERAND_4_COMPONENT_MASK_MODE
,
4924 VGPU10_OPERAND_4_COMPONENT_MASK_ALL
,
4925 VGPU10_INTERPOLATION_UNDEFINED
,
4926 addSignature
, sgn_name
);
4929 if (emit
->tcs
.control_point_phase
) {
4930 if (emit
->tcs
.control_point_input_index
== INVALID_INDEX
) {
4932 /* Add input control point declaration if it does not exist */
4933 if ((indicesMask
& (1 << emit
->linkage
.position_index
)) == 0) {
4934 emit
->linkage
.input_map
[emit
->linkage
.num_inputs
] =
4935 emit
->linkage
.position_index
;
4936 emit
->tcs
.control_point_input_index
= emit
->linkage
.num_inputs
++;
4938 emit_input_declaration(emit
, VGPU10_OPCODE_DCL_INPUT
,
4939 VGPU10_OPERAND_TYPE_INPUT
,
4940 VGPU10_OPERAND_INDEX_2D
,
4941 emit
->linkage
.position_index
,
4942 emit
->key
.tcs
.vertices_per_patch
,
4943 VGPU10_NAME_UNDEFINED
,
4944 VGPU10_OPERAND_4_COMPONENT
,
4945 VGPU10_OPERAND_4_COMPONENT_MASK_MODE
,
4946 VGPU10_OPERAND_4_COMPONENT_MASK_ALL
,
4947 VGPU10_INTERPOLATION_UNDEFINED
, TRUE
,
4948 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION
);
4952 /* Also add an address register for the indirection to the
4953 * input control points
4955 emit
->tcs
.control_point_addr_index
= emit
->num_address_regs
++;
4961 emit_tessfactor_input_declarations(struct svga_shader_emitter_v10
*emit
)
4964 /* In tcs, tess factors are emitted as extra outputs.
4965 * The starting register index for the tess factors is captured
4966 * in the compile key.
4968 unsigned inputIndex
= emit
->key
.tes
.tessfactor_index
;
4970 if (emit
->tes
.prim_mode
== PIPE_PRIM_QUADS
) {
4971 if (emit
->key
.tes
.need_tessouter
) {
4972 emit
->tes
.outer
.in_index
= inputIndex
;
4973 for (int i
= 0; i
< 4; i
++) {
4974 emit_tesslevel_declaration(emit
, inputIndex
++,
4975 VGPU10_OPCODE_DCL_INPUT_SIV
,
4976 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT
,
4977 VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR
+ i
,
4978 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR
+ i
);
4982 if (emit
->key
.tes
.need_tessinner
) {
4983 emit
->tes
.inner
.in_index
= inputIndex
;
4984 emit_tesslevel_declaration(emit
, inputIndex
++,
4985 VGPU10_OPCODE_DCL_INPUT_SIV
,
4986 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT
,
4987 VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR
,
4988 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR
);
4990 emit_tesslevel_declaration(emit
, inputIndex
++,
4991 VGPU10_OPCODE_DCL_INPUT_SIV
,
4992 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT
,
4993 VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR
,
4994 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR
);
4997 else if (emit
->tes
.prim_mode
== PIPE_PRIM_TRIANGLES
) {
4998 if (emit
->key
.tes
.need_tessouter
) {
4999 emit
->tes
.outer
.in_index
= inputIndex
;
5000 for (int i
= 0; i
< 3; i
++) {
5001 emit_tesslevel_declaration(emit
, inputIndex
++,
5002 VGPU10_OPCODE_DCL_INPUT_SIV
,
5003 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT
,
5004 VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR
+ i
,
5005 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR
+ i
);
5009 if (emit
->key
.tes
.need_tessinner
) {
5010 emit
->tes
.inner
.in_index
= inputIndex
;
5011 emit_tesslevel_declaration(emit
, inputIndex
++,
5012 VGPU10_OPCODE_DCL_INPUT_SIV
,
5013 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT
,
5014 VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR
,
5015 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR
);
5018 else if (emit
->tes
.prim_mode
== PIPE_PRIM_LINES
) {
5019 if (emit
->key
.tes
.need_tessouter
) {
5020 emit
->tes
.outer
.in_index
= inputIndex
;
5021 emit_tesslevel_declaration(emit
, inputIndex
++,
5022 VGPU10_OPCODE_DCL_INPUT_SIV
,
5023 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT
,
5024 VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR
,
5025 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR
);
5027 emit_tesslevel_declaration(emit
, inputIndex
++,
5028 VGPU10_OPCODE_DCL_INPUT_SIV
,
5029 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT
,
5030 VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR
,
5031 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR
);
5038 * Emit input declarations for tessellation evaluation shader.
5041 emit_tes_input_declarations(struct svga_shader_emitter_v10
*emit
)
5045 for (i
= 0; i
< emit
->info
.num_inputs
; i
++) {
5046 unsigned usage_mask
= emit
->info
.input_usage_mask
[i
];
5047 unsigned index
= emit
->linkage
.input_map
[i
];
5049 const enum tgsi_semantic semantic_name
=
5050 emit
->info
.input_semantic_name
[i
];
5051 SVGA3dDXSignatureSemanticName sgn_name
;
5052 VGPU10_OPERAND_TYPE operandType
;
5053 VGPU10_OPERAND_INDEX_DIMENSION dim
;
5055 if (usage_mask
== 0)
5056 usage_mask
= 1; /* at least set usage mask to one */
5058 if (semantic_name
== TGSI_SEMANTIC_PATCH
) {
5059 operandType
= VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT
;
5060 dim
= VGPU10_OPERAND_INDEX_1D
;
5062 sgn_name
= map_tgsi_semantic_to_sgn_name(semantic_name
);
5065 operandType
= VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT
;
5066 dim
= VGPU10_OPERAND_INDEX_2D
;
5067 size
= emit
->key
.tes
.vertices_per_patch
;
5068 sgn_name
= SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
;
5071 emit_input_declaration(emit
, VGPU10_OPCODE_DCL_INPUT
, operandType
,
5072 dim
, index
, size
, VGPU10_NAME_UNDEFINED
,
5073 VGPU10_OPERAND_4_COMPONENT
,
5074 VGPU10_OPERAND_4_COMPONENT_MASK_MODE
,
5075 VGPU10_OPERAND_4_COMPONENT_MASK_ALL
,
5076 VGPU10_INTERPOLATION_UNDEFINED
,
5080 emit_tessfactor_input_declarations(emit
);
5082 /* DX spec requires DS input controlpoint/patch-constant signatures to match
5083 * the HS output controlpoint/patch-constant signatures exactly.
5084 * Add missing input declarations even if they are not used in the shader.
5086 if (emit
->linkage
.num_inputs
< emit
->linkage
.prevShader
.num_outputs
) {
5087 struct tgsi_shader_info
*prevInfo
= emit
->prevShaderInfo
;
5088 for (i
= 0; i
< emit
->linkage
.prevShader
.num_outputs
; i
++) {
5090 /* If a tcs output does not have a corresponding input register in
5093 if (emit
->linkage
.prevShader
.output_map
[i
] >
5094 emit
->linkage
.input_map_max
) {
5095 const enum tgsi_semantic sem_name
= prevInfo
->output_semantic_name
[i
];
5097 if (sem_name
== TGSI_SEMANTIC_PATCH
) {
5098 emit_input_declaration(emit
, VGPU10_OPCODE_DCL_INPUT
,
5099 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT
,
5100 VGPU10_OPERAND_INDEX_1D
,
5101 i
, 1, VGPU10_NAME_UNDEFINED
,
5102 VGPU10_OPERAND_4_COMPONENT
,
5103 VGPU10_OPERAND_4_COMPONENT_MASK_MODE
,
5104 VGPU10_OPERAND_4_COMPONENT_MASK_ALL
,
5105 VGPU10_INTERPOLATION_UNDEFINED
,
5107 map_tgsi_semantic_to_sgn_name(sem_name
));
5109 } else if (sem_name
!= TGSI_SEMANTIC_TESSINNER
&&
5110 sem_name
!= TGSI_SEMANTIC_TESSOUTER
) {
5111 emit_input_declaration(emit
, VGPU10_OPCODE_DCL_INPUT
,
5112 VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT
,
5113 VGPU10_OPERAND_INDEX_2D
,
5114 i
, emit
->key
.tes
.vertices_per_patch
,
5115 VGPU10_NAME_UNDEFINED
,
5116 VGPU10_OPERAND_4_COMPONENT
,
5117 VGPU10_OPERAND_4_COMPONENT_MASK_MODE
,
5118 VGPU10_OPERAND_4_COMPONENT_MASK_ALL
,
5119 VGPU10_INTERPOLATION_UNDEFINED
,
5121 map_tgsi_semantic_to_sgn_name(sem_name
));
5123 /* tessellation factors are taken care of in
5124 * emit_tessfactor_input_declarations().
5133 * Emit all input declarations.
5136 emit_input_declarations(struct svga_shader_emitter_v10
*emit
)
5138 emit
->index_range
.required
=
5139 emit
->info
.indirect_files
& (1 << TGSI_FILE_INPUT
) ? TRUE
: FALSE
;
5141 switch (emit
->unit
) {
5142 case PIPE_SHADER_FRAGMENT
:
5143 emit_fs_input_declarations(emit
);
5145 case PIPE_SHADER_GEOMETRY
:
5146 emit_gs_input_declarations(emit
);
5148 case PIPE_SHADER_VERTEX
:
5149 emit_vs_input_declarations(emit
);
5151 case PIPE_SHADER_TESS_CTRL
:
5152 emit_tcs_input_declarations(emit
);
5154 case PIPE_SHADER_TESS_EVAL
:
5155 emit_tes_input_declarations(emit
);
5157 case PIPE_SHADER_COMPUTE
:
5158 //XXX emit_cs_input_declarations(emit);
5164 if (emit
->index_range
.start_index
!= INVALID_INDEX
) {
5165 emit_index_range_declaration(emit
);
5167 emit
->index_range
.required
= FALSE
;
5173 * Emit all output declarations.
5176 emit_output_declarations(struct svga_shader_emitter_v10
*emit
)
5178 emit
->index_range
.required
=
5179 emit
->info
.indirect_files
& (1 << TGSI_FILE_OUTPUT
) ? TRUE
: FALSE
;
5181 switch (emit
->unit
) {
5182 case PIPE_SHADER_FRAGMENT
:
5183 emit_fs_output_declarations(emit
);
5185 case PIPE_SHADER_GEOMETRY
:
5186 emit_gs_output_declarations(emit
);
5188 case PIPE_SHADER_VERTEX
:
5189 emit_vs_output_declarations(emit
);
5191 case PIPE_SHADER_TESS_CTRL
:
5192 emit_tcs_output_declarations(emit
);
5194 case PIPE_SHADER_TESS_EVAL
:
5195 emit_tes_output_declarations(emit
);
5197 case PIPE_SHADER_COMPUTE
:
5198 //XXX emit_cs_output_declarations(emit);
5204 if (emit
->vposition
.so_index
!= INVALID_INDEX
&&
5205 emit
->vposition
.out_index
!= INVALID_INDEX
) {
5207 assert(emit
->unit
!= PIPE_SHADER_FRAGMENT
);
5209 /* Emit the declaration for the non-adjusted vertex position
5210 * for stream output purpose
5212 emit_output_declaration(emit
, VGPU10_OPCODE_DCL_OUTPUT
,
5213 emit
->vposition
.so_index
,
5214 VGPU10_NAME_UNDEFINED
,
5215 VGPU10_OPERAND_4_COMPONENT_MASK_ALL
,
5217 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION
);
5220 if (emit
->clip_dist_so_index
!= INVALID_INDEX
&&
5221 emit
->clip_dist_out_index
!= INVALID_INDEX
) {
5223 assert(emit
->unit
!= PIPE_SHADER_FRAGMENT
);
5225 /* Emit the declaration for the clip distance shadow copy which
5226 * will be used for stream output purpose and for clip distance
5229 emit_output_declaration(emit
, VGPU10_OPCODE_DCL_OUTPUT
,
5230 emit
->clip_dist_so_index
,
5231 VGPU10_NAME_UNDEFINED
,
5232 emit
->output_usage_mask
[emit
->clip_dist_out_index
],
5234 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
);
5236 if (emit
->info
.num_written_clipdistance
> 4) {
5237 /* for the second clip distance register, each handles 4 planes */
5238 emit_output_declaration(emit
, VGPU10_OPCODE_DCL_OUTPUT
,
5239 emit
->clip_dist_so_index
+ 1,
5240 VGPU10_NAME_UNDEFINED
,
5241 emit
->output_usage_mask
[emit
->clip_dist_out_index
+1],
5243 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
);
5247 if (emit
->index_range
.start_index
!= INVALID_INDEX
) {
5248 emit_index_range_declaration(emit
);
5250 emit
->index_range
.required
= FALSE
;
5256 * A helper function to create a temporary indexable array
5257 * and initialize the corresponding entries in the temp_map array.
5260 create_temp_array(struct svga_shader_emitter_v10
*emit
,
5261 unsigned arrayID
, unsigned first
, unsigned count
,
5262 unsigned startIndex
)
5264 unsigned i
, tempIndex
= startIndex
;
5266 emit
->num_temp_arrays
= MAX2(emit
->num_temp_arrays
, arrayID
+ 1);
5267 assert(emit
->num_temp_arrays
<= MAX_TEMP_ARRAYS
);
5268 emit
->num_temp_arrays
= MIN2(emit
->num_temp_arrays
, MAX_TEMP_ARRAYS
);
5270 emit
->temp_arrays
[arrayID
].start
= first
;
5271 emit
->temp_arrays
[arrayID
].size
= count
;
5273 /* Fill in the temp_map entries for this temp array */
5274 for (i
= 0; i
< count
; i
++, tempIndex
++) {
5275 emit
->temp_map
[tempIndex
].arrayId
= arrayID
;
5276 emit
->temp_map
[tempIndex
].index
= i
;
5282 * Emit the declaration for the temporary registers.
5285 emit_temporaries_declaration(struct svga_shader_emitter_v10
*emit
)
5287 unsigned total_temps
, reg
, i
;
5289 total_temps
= emit
->num_shader_temps
;
5291 /* If there is indirect access to non-indexable temps in the shader,
5292 * convert those temps to indexable temps. This works around a bug
5293 * in the GLSL->TGSI translator exposed in piglit test
5294 * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test.
5295 * Internal temps added by the driver remain as non-indexable temps.
5297 if ((emit
->info
.indirect_files
& (1 << TGSI_FILE_TEMPORARY
)) &&
5298 emit
->num_temp_arrays
== 0) {
5299 create_temp_array(emit
, 1, 0, total_temps
, 0);
5302 /* Allocate extra temps for specially-implemented instructions,
5305 total_temps
+= MAX_INTERNAL_TEMPS
;
5307 /* Allocate extra temps for clip distance or clip vertex.
5309 if (emit
->clip_mode
== CLIP_DISTANCE
) {
5310 /* We need to write the clip distance to a temporary register
5311 * first. Then it will be copied to the shadow copy for
5312 * the clip distance varying variable and stream output purpose.
5313 * It will also be copied to the actual CLIPDIST register
5314 * according to the enabled clip planes
5316 emit
->clip_dist_tmp_index
= total_temps
++;
5317 if (emit
->info
.num_written_clipdistance
> 4)
5318 total_temps
++; /* second clip register */
5320 else if (emit
->clip_mode
== CLIP_VERTEX
&& emit
->key
.last_vertex_stage
) {
5321 /* If the current shader is in the last vertex processing stage,
5322 * We need to convert the TGSI CLIPVERTEX output to one or more
5323 * clip distances. Allocate a temp reg for the clipvertex here.
5325 assert(emit
->info
.writes_clipvertex
> 0);
5326 emit
->clip_vertex_tmp_index
= total_temps
;
5330 if (emit
->info
.uses_vertexid
) {
5331 assert(emit
->unit
== PIPE_SHADER_VERTEX
);
5332 emit
->vs
.vertex_id_tmp_index
= total_temps
++;
5335 if (emit
->unit
== PIPE_SHADER_VERTEX
|| emit
->unit
== PIPE_SHADER_GEOMETRY
) {
5336 if (emit
->vposition
.need_prescale
|| emit
->key
.vs
.undo_viewport
||
5337 emit
->key
.clip_plane_enable
||
5338 emit
->vposition
.so_index
!= INVALID_INDEX
) {
5339 emit
->vposition
.tmp_index
= total_temps
;
5343 if (emit
->vposition
.need_prescale
) {
5344 emit
->vposition
.prescale_scale_index
= total_temps
++;
5345 emit
->vposition
.prescale_trans_index
= total_temps
++;
5348 if (emit
->unit
== PIPE_SHADER_VERTEX
) {
5349 unsigned attrib_mask
= (emit
->key
.vs
.adjust_attrib_w_1
|
5350 emit
->key
.vs
.adjust_attrib_itof
|
5351 emit
->key
.vs
.adjust_attrib_utof
|
5352 emit
->key
.vs
.attrib_is_bgra
|
5353 emit
->key
.vs
.attrib_puint_to_snorm
|
5354 emit
->key
.vs
.attrib_puint_to_uscaled
|
5355 emit
->key
.vs
.attrib_puint_to_sscaled
);
5356 while (attrib_mask
) {
5357 unsigned index
= u_bit_scan(&attrib_mask
);
5358 emit
->vs
.adjusted_input
[index
] = total_temps
++;
5361 else if (emit
->unit
== PIPE_SHADER_GEOMETRY
) {
5362 if (emit
->key
.gs
.writes_viewport_index
)
5363 emit
->gs
.viewport_index_tmp_index
= total_temps
++;
5366 else if (emit
->unit
== PIPE_SHADER_FRAGMENT
) {
5367 if (emit
->key
.fs
.alpha_func
!= SVGA3D_CMP_ALWAYS
||
5368 emit
->key
.fs
.write_color0_to_n_cbufs
> 1) {
5369 /* Allocate a temp to hold the output color */
5370 emit
->fs
.color_tmp_index
= total_temps
;
5374 if (emit
->fs
.face_input_index
!= INVALID_INDEX
) {
5375 /* Allocate a temp for the +/-1 face register */
5376 emit
->fs
.face_tmp_index
= total_temps
;
5380 if (emit
->fs
.fragcoord_input_index
!= INVALID_INDEX
) {
5381 /* Allocate a temp for modified fragment position register */
5382 emit
->fs
.fragcoord_tmp_index
= total_temps
;
5386 if (emit
->fs
.sample_pos_sys_index
!= INVALID_INDEX
) {
5387 /* Allocate a temp for the sample position */
5388 emit
->fs
.sample_pos_tmp_index
= total_temps
++;
5391 else if (emit
->unit
== PIPE_SHADER_TESS_EVAL
) {
5392 if (emit
->vposition
.need_prescale
) {
5393 emit
->vposition
.tmp_index
= total_temps
++;
5394 emit
->vposition
.prescale_scale_index
= total_temps
++;
5395 emit
->vposition
.prescale_trans_index
= total_temps
++;
5398 if (emit
->tes
.inner
.tgsi_index
) {
5399 emit
->tes
.inner
.temp_index
= total_temps
;
5403 if (emit
->tes
.outer
.tgsi_index
) {
5404 emit
->tes
.outer
.temp_index
= total_temps
;
5408 else if (emit
->unit
== PIPE_SHADER_TESS_CTRL
) {
5409 if (emit
->tcs
.inner
.tgsi_index
!= INVALID_INDEX
) {
5410 if (!emit
->tcs
.control_point_phase
) {
5411 emit
->tcs
.inner
.temp_index
= total_temps
;
5415 if (emit
->tcs
.outer
.tgsi_index
!= INVALID_INDEX
) {
5416 if (!emit
->tcs
.control_point_phase
) {
5417 emit
->tcs
.outer
.temp_index
= total_temps
;
5422 if (emit
->tcs
.control_point_phase
&&
5423 emit
->info
.reads_pervertex_outputs
) {
5424 emit
->tcs
.control_point_tmp_index
= total_temps
;
5425 total_temps
+= emit
->tcs
.control_point_out_count
;
5427 else if (!emit
->tcs
.control_point_phase
&&
5428 emit
->info
.reads_perpatch_outputs
) {
5430 /* If there is indirect access to the patch constant outputs
5431 * in the control point phase, then an indexable temporary array
5432 * will be created for these patch constant outputs.
5433 * Note, indirect access can only be applicable to
5434 * patch constant outputs in the control point phase.
5436 if (emit
->info
.indirect_files
& (1 << TGSI_FILE_OUTPUT
)) {
5438 emit
->num_temp_arrays
? emit
->num_temp_arrays
: 1;
5439 create_temp_array(emit
, arrayID
, 0,
5440 emit
->tcs
.patch_generic_out_count
, total_temps
);
5442 emit
->tcs
.patch_generic_tmp_index
= total_temps
;
5443 total_temps
+= emit
->tcs
.patch_generic_out_count
;
5446 emit
->tcs
.invocation_id_tmp_index
= total_temps
++;
5449 for (i
= 0; i
< emit
->num_address_regs
; i
++) {
5450 emit
->address_reg_index
[i
] = total_temps
++;
5453 /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10
5454 * temp indexes. Basically, we compact all the non-array temp register
5455 * indexes into a consecutive series.
5457 * Before, we may have some TGSI declarations like:
5458 * DCL TEMP[0..1], LOCAL
5459 * DCL TEMP[2..4], ARRAY(1), LOCAL
5460 * DCL TEMP[5..7], ARRAY(2), LOCAL
5461 * plus, some extra temps, like TEMP[8], TEMP[9] for misc things
5463 * After, we'll have a map like this:
5464 * temp_map[0] = { array 0, index 0 }
5465 * temp_map[1] = { array 0, index 1 }
5466 * temp_map[2] = { array 1, index 0 }
5467 * temp_map[3] = { array 1, index 1 }
5468 * temp_map[4] = { array 1, index 2 }
5469 * temp_map[5] = { array 2, index 0 }
5470 * temp_map[6] = { array 2, index 1 }
5471 * temp_map[7] = { array 2, index 2 }
5472 * temp_map[8] = { array 0, index 2 }
5473 * temp_map[9] = { array 0, index 3 }
5475 * We'll declare two arrays of 3 elements, plus a set of four non-indexed
5476 * temps numbered 0..3
5478 * Any time we emit a temporary register index, we'll have to use the
5479 * temp_map[] table to convert the TGSI index to the VGPU10 index.
5481 * Finally, we recompute the total_temps value here.
5484 for (i
= 0; i
< total_temps
; i
++) {
5485 if (emit
->temp_map
[i
].arrayId
== 0) {
5486 emit
->temp_map
[i
].index
= reg
++;
5491 debug_printf("total_temps %u\n", total_temps
);
5492 for (i
= 0; i
< total_temps
; i
++) {
5493 debug_printf("temp %u -> array %u index %u\n",
5494 i
, emit
->temp_map
[i
].arrayId
, emit
->temp_map
[i
].index
);
5500 /* Emit declaration of ordinary temp registers */
5501 if (total_temps
> 0) {
5502 VGPU10OpcodeToken0 opcode0
;
5505 opcode0
.opcodeType
= VGPU10_OPCODE_DCL_TEMPS
;
5507 begin_emit_instruction(emit
);
5508 emit_dword(emit
, opcode0
.value
);
5509 emit_dword(emit
, total_temps
);
5510 end_emit_instruction(emit
);
5513 /* Emit declarations for indexable temp arrays. Skip 0th entry since
5516 for (i
= 1; i
< emit
->num_temp_arrays
; i
++) {
5517 unsigned num_temps
= emit
->temp_arrays
[i
].size
;
5519 if (num_temps
> 0) {
5520 VGPU10OpcodeToken0 opcode0
;
5523 opcode0
.opcodeType
= VGPU10_OPCODE_DCL_INDEXABLE_TEMP
;
5525 begin_emit_instruction(emit
);
5526 emit_dword(emit
, opcode0
.value
);
5527 emit_dword(emit
, i
); /* which array */
5528 emit_dword(emit
, num_temps
);
5529 emit_dword(emit
, 4); /* num components */
5530 end_emit_instruction(emit
);
5532 total_temps
+= num_temps
;
5536 /* Check that the grand total of all regular and indexed temps is
5539 check_register_index(emit
, VGPU10_OPCODE_DCL_TEMPS
, total_temps
- 1);
5546 emit_constant_declaration(struct svga_shader_emitter_v10
*emit
)
5548 VGPU10OpcodeToken0 opcode0
;
5549 VGPU10OperandToken0 operand0
;
5550 unsigned total_consts
, i
;
5553 opcode0
.opcodeType
= VGPU10_OPCODE_DCL_CONSTANT_BUFFER
;
5554 opcode0
.accessPattern
= VGPU10_CB_IMMEDIATE_INDEXED
;
5555 /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */
5558 operand0
.numComponents
= VGPU10_OPERAND_4_COMPONENT
;
5559 operand0
.indexDimension
= VGPU10_OPERAND_INDEX_2D
;
5560 operand0
.index0Representation
= VGPU10_OPERAND_INDEX_IMMEDIATE32
;
5561 operand0
.index1Representation
= VGPU10_OPERAND_INDEX_IMMEDIATE32
;
5562 operand0
.operandType
= VGPU10_OPERAND_TYPE_CONSTANT_BUFFER
;
5563 operand0
.selectionMode
= VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE
;
5564 operand0
.swizzleX
= 0;
5565 operand0
.swizzleY
= 1;
5566 operand0
.swizzleZ
= 2;
5567 operand0
.swizzleW
= 3;
5570 * Emit declaration for constant buffer [0]. We also allocate
5571 * room for the extra constants here.
5573 total_consts
= emit
->num_shader_consts
[0];
5575 /* Now, allocate constant slots for the "extra" constants.
5576 * Note: it's critical that these extra constant locations
5577 * exactly match what's emitted by the "extra" constants code
5578 * in svga_state_constants.c
5581 /* Vertex position scale/translation */
5582 if (emit
->vposition
.need_prescale
) {
5583 emit
->vposition
.prescale_cbuf_index
= total_consts
;
5584 total_consts
+= (2 * emit
->vposition
.num_prescale
);
5587 if (emit
->unit
== PIPE_SHADER_VERTEX
) {
5588 if (emit
->key
.vs
.undo_viewport
) {
5589 emit
->vs
.viewport_index
= total_consts
++;
5591 if (emit
->key
.vs
.need_vertex_id_bias
) {
5592 emit
->vs
.vertex_id_bias_index
= total_consts
++;
5596 /* user-defined clip planes */
5597 if (emit
->key
.clip_plane_enable
) {
5598 unsigned n
= util_bitcount(emit
->key
.clip_plane_enable
);
5599 assert(emit
->unit
!= PIPE_SHADER_FRAGMENT
&&
5600 emit
->unit
!= PIPE_SHADER_COMPUTE
);
5601 for (i
= 0; i
< n
; i
++) {
5602 emit
->clip_plane_const
[i
] = total_consts
++;
5606 for (i
= 0; i
< emit
->num_samplers
; i
++) {
5608 if (emit
->sampler_view
[i
]) {
5610 /* Texcoord scale factors for RECT textures */
5611 if (emit
->key
.tex
[i
].unnormalized
) {
5612 emit
->texcoord_scale_index
[i
] = total_consts
++;
5615 /* Texture buffer sizes */
5616 if (emit
->sampler_target
[i
] == TGSI_TEXTURE_BUFFER
) {
5617 emit
->texture_buffer_size_index
[i
] = total_consts
++;
5622 if (total_consts
> 0) {
5623 begin_emit_instruction(emit
);
5624 emit_dword(emit
, opcode0
.value
);
5625 emit_dword(emit
, operand0
.value
);
5626 emit_dword(emit
, 0); /* which const buffer slot */
5627 emit_dword(emit
, total_consts
);
5628 end_emit_instruction(emit
);
5631 /* Declare remaining constant buffers (UBOs) */
5632 for (i
= 1; i
< ARRAY_SIZE(emit
->num_shader_consts
); i
++) {
5633 if (emit
->num_shader_consts
[i
] > 0) {
5634 begin_emit_instruction(emit
);
5635 emit_dword(emit
, opcode0
.value
);
5636 emit_dword(emit
, operand0
.value
);
5637 emit_dword(emit
, i
); /* which const buffer slot */
5638 emit_dword(emit
, emit
->num_shader_consts
[i
]);
5639 end_emit_instruction(emit
);
5648 * Emit declarations for samplers.
5651 emit_sampler_declarations(struct svga_shader_emitter_v10
*emit
)
5655 for (i
= 0; i
< emit
->num_samplers
; i
++) {
5656 VGPU10OpcodeToken0 opcode0
;
5657 VGPU10OperandToken0 operand0
;
5660 opcode0
.opcodeType
= VGPU10_OPCODE_DCL_SAMPLER
;
5661 opcode0
.samplerMode
= VGPU10_SAMPLER_MODE_DEFAULT
;
5664 operand0
.numComponents
= VGPU10_OPERAND_0_COMPONENT
;
5665 operand0
.operandType
= VGPU10_OPERAND_TYPE_SAMPLER
;
5666 operand0
.indexDimension
= VGPU10_OPERAND_INDEX_1D
;
5667 operand0
.index0Representation
= VGPU10_OPERAND_INDEX_IMMEDIATE32
;
5669 begin_emit_instruction(emit
);
5670 emit_dword(emit
, opcode0
.value
);
5671 emit_dword(emit
, operand0
.value
);
5672 emit_dword(emit
, i
);
5673 end_emit_instruction(emit
);
5681 * Translate TGSI_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
5684 tgsi_texture_to_resource_dimension(enum tgsi_texture_type target
,
5685 unsigned num_samples
,
5688 if (target
== TGSI_TEXTURE_2D_MSAA
&& num_samples
< 2) {
5689 target
= TGSI_TEXTURE_2D
;
5691 else if (target
== TGSI_TEXTURE_2D_ARRAY_MSAA
&& num_samples
< 2) {
5692 target
= TGSI_TEXTURE_2D_ARRAY
;
5696 case TGSI_TEXTURE_BUFFER
:
5697 return VGPU10_RESOURCE_DIMENSION_BUFFER
;
5698 case TGSI_TEXTURE_1D
:
5699 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D
;
5700 case TGSI_TEXTURE_2D
:
5701 case TGSI_TEXTURE_RECT
:
5702 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D
;
5703 case TGSI_TEXTURE_3D
:
5704 return VGPU10_RESOURCE_DIMENSION_TEXTURE3D
;
5705 case TGSI_TEXTURE_CUBE
:
5706 case TGSI_TEXTURE_SHADOWCUBE
:
5707 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE
;
5708 case TGSI_TEXTURE_SHADOW1D
:
5709 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D
;
5710 case TGSI_TEXTURE_SHADOW2D
:
5711 case TGSI_TEXTURE_SHADOWRECT
:
5712 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D
;
5713 case TGSI_TEXTURE_1D_ARRAY
:
5714 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
5715 return is_array
? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
5716 : VGPU10_RESOURCE_DIMENSION_TEXTURE1D
;
5717 case TGSI_TEXTURE_2D_ARRAY
:
5718 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
5719 return is_array
? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY
5720 : VGPU10_RESOURCE_DIMENSION_TEXTURE2D
;
5721 case TGSI_TEXTURE_2D_MSAA
:
5722 return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS
;
5723 case TGSI_TEXTURE_2D_ARRAY_MSAA
:
5724 return is_array
? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
5725 : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS
;
5726 case TGSI_TEXTURE_CUBE_ARRAY
:
5727 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
:
5728 return is_array
? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY
5729 : VGPU10_RESOURCE_DIMENSION_TEXTURECUBE
;
5731 assert(!"Unexpected resource type");
5732 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D
;
5738 * Given a tgsi_return_type, return true iff it is an integer type.
5741 is_integer_type(enum tgsi_return_type type
)
5744 case TGSI_RETURN_TYPE_SINT
:
5745 case TGSI_RETURN_TYPE_UINT
:
5747 case TGSI_RETURN_TYPE_FLOAT
:
5748 case TGSI_RETURN_TYPE_UNORM
:
5749 case TGSI_RETURN_TYPE_SNORM
:
5751 case TGSI_RETURN_TYPE_COUNT
:
5753 assert(!"is_integer_type: Unknown tgsi_return_type");
5760 * Emit declarations for resources.
5761 * XXX When we're sure that all TGSI shaders will be generated with
5762 * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may
5766 emit_resource_declarations(struct svga_shader_emitter_v10
*emit
)
5770 /* Emit resource decl for each sampler */
5771 for (i
= 0; i
< emit
->num_samplers
; i
++) {
5772 VGPU10OpcodeToken0 opcode0
;
5773 VGPU10OperandToken0 operand0
;
5774 VGPU10ResourceReturnTypeToken return_type
;
5775 VGPU10_RESOURCE_RETURN_TYPE rt
;
5778 opcode0
.opcodeType
= VGPU10_OPCODE_DCL_RESOURCE
;
5779 opcode0
.resourceDimension
=
5780 tgsi_texture_to_resource_dimension(emit
->sampler_target
[i
],
5781 emit
->key
.tex
[i
].num_samples
,
5782 emit
->key
.tex
[i
].is_array
);
5783 opcode0
.sampleCount
= emit
->key
.tex
[i
].num_samples
;
5785 operand0
.numComponents
= VGPU10_OPERAND_0_COMPONENT
;
5786 operand0
.operandType
= VGPU10_OPERAND_TYPE_RESOURCE
;
5787 operand0
.indexDimension
= VGPU10_OPERAND_INDEX_1D
;
5788 operand0
.index0Representation
= VGPU10_OPERAND_INDEX_IMMEDIATE32
;
5791 /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */
5792 STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM
== TGSI_RETURN_TYPE_UNORM
+ 1);
5793 STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM
== TGSI_RETURN_TYPE_SNORM
+ 1);
5794 STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT
== TGSI_RETURN_TYPE_SINT
+ 1);
5795 STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT
== TGSI_RETURN_TYPE_UINT
+ 1);
5796 STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT
== TGSI_RETURN_TYPE_FLOAT
+ 1);
5797 assert(emit
->sampler_return_type
[i
] <= TGSI_RETURN_TYPE_FLOAT
);
5798 rt
= emit
->sampler_return_type
[i
] + 1;
5800 switch (emit
->sampler_return_type
[i
]) {
5801 case TGSI_RETURN_TYPE_UNORM
: rt
= VGPU10_RETURN_TYPE_UNORM
; break;
5802 case TGSI_RETURN_TYPE_SNORM
: rt
= VGPU10_RETURN_TYPE_SNORM
; break;
5803 case TGSI_RETURN_TYPE_SINT
: rt
= VGPU10_RETURN_TYPE_SINT
; break;
5804 case TGSI_RETURN_TYPE_UINT
: rt
= VGPU10_RETURN_TYPE_UINT
; break;
5805 case TGSI_RETURN_TYPE_FLOAT
: rt
= VGPU10_RETURN_TYPE_FLOAT
; break;
5806 case TGSI_RETURN_TYPE_COUNT
:
5808 rt
= VGPU10_RETURN_TYPE_FLOAT
;
5809 assert(!"emit_resource_declarations: Unknown tgsi_return_type");
5813 return_type
.value
= 0;
5814 return_type
.component0
= rt
;
5815 return_type
.component1
= rt
;
5816 return_type
.component2
= rt
;
5817 return_type
.component3
= rt
;
5819 begin_emit_instruction(emit
);
5820 emit_dword(emit
, opcode0
.value
);
5821 emit_dword(emit
, operand0
.value
);
5822 emit_dword(emit
, i
);
5823 emit_dword(emit
, return_type
.value
);
5824 end_emit_instruction(emit
);
5831 * Emit instruction with n=1, 2 or 3 source registers.
5834 emit_instruction_opn(struct svga_shader_emitter_v10
*emit
,
5836 const struct tgsi_full_dst_register
*dst
,
5837 const struct tgsi_full_src_register
*src1
,
5838 const struct tgsi_full_src_register
*src2
,
5839 const struct tgsi_full_src_register
*src3
,
5840 boolean saturate
, bool precise
)
5842 begin_emit_instruction(emit
);
5843 emit_opcode_precise(emit
, opcode
, saturate
, precise
);
5844 emit_dst_register(emit
, dst
);
5845 emit_src_register(emit
, src1
);
5847 emit_src_register(emit
, src2
);
5850 emit_src_register(emit
, src3
);
5852 end_emit_instruction(emit
);
5856 emit_instruction_op1(struct svga_shader_emitter_v10
*emit
,
5858 const struct tgsi_full_dst_register
*dst
,
5859 const struct tgsi_full_src_register
*src
)
5861 emit_instruction_opn(emit
, opcode
, dst
, src
, NULL
, NULL
, FALSE
, FALSE
);
5865 emit_instruction_op2(struct svga_shader_emitter_v10
*emit
,
5866 VGPU10_OPCODE_TYPE opcode
,
5867 const struct tgsi_full_dst_register
*dst
,
5868 const struct tgsi_full_src_register
*src1
,
5869 const struct tgsi_full_src_register
*src2
)
5871 emit_instruction_opn(emit
, opcode
, dst
, src1
, src2
, NULL
, FALSE
, FALSE
);
5875 emit_instruction_op3(struct svga_shader_emitter_v10
*emit
,
5876 VGPU10_OPCODE_TYPE opcode
,
5877 const struct tgsi_full_dst_register
*dst
,
5878 const struct tgsi_full_src_register
*src1
,
5879 const struct tgsi_full_src_register
*src2
,
5880 const struct tgsi_full_src_register
*src3
)
5882 emit_instruction_opn(emit
, opcode
, dst
, src1
, src2
, src3
, FALSE
, FALSE
);
5886 emit_instruction_op0(struct svga_shader_emitter_v10
*emit
,
5887 VGPU10_OPCODE_TYPE opcode
)
5889 begin_emit_instruction(emit
);
5890 emit_opcode(emit
, opcode
, FALSE
);
5891 end_emit_instruction(emit
);
5895 * Tessellation inner/outer levels needs to be store into its
5896 * appropriate registers depending on prim_mode.
5899 store_tesslevels(struct svga_shader_emitter_v10
*emit
)
5903 /* tessellation levels are required input/out in hull shader.
5904 * emitting the inner/outer tessellation levels, either from
5905 * values provided in tcs or fallback default values which is 1.0
5907 if (emit
->key
.tcs
.prim_mode
== PIPE_PRIM_QUADS
) {
5908 struct tgsi_full_src_register temp_src
;
5910 if (emit
->tcs
.inner
.tgsi_index
!= INVALID_INDEX
)
5911 temp_src
= make_src_temp_reg(emit
->tcs
.inner
.temp_index
);
5913 temp_src
= make_immediate_reg_float(emit
, 1.0f
);
5915 for (i
= 0; i
< 2; i
++) {
5916 struct tgsi_full_src_register src
=
5917 scalar_src(&temp_src
, TGSI_SWIZZLE_X
+ i
);
5918 struct tgsi_full_dst_register dst
=
5919 make_dst_reg(TGSI_FILE_OUTPUT
, emit
->tcs
.inner
.out_index
+ i
);
5920 dst
= writemask_dst(&dst
, TGSI_WRITEMASK_X
);
5921 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &dst
, &src
);
5924 if (emit
->tcs
.outer
.tgsi_index
!= INVALID_INDEX
)
5925 temp_src
= make_src_temp_reg(emit
->tcs
.outer
.temp_index
);
5927 temp_src
= make_immediate_reg_float(emit
, 1.0f
);
5929 for (i
= 0; i
< 4; i
++) {
5930 struct tgsi_full_src_register src
=
5931 scalar_src(&temp_src
, TGSI_SWIZZLE_X
+ i
);
5932 struct tgsi_full_dst_register dst
=
5933 make_dst_reg(TGSI_FILE_OUTPUT
, emit
->tcs
.outer
.out_index
+ i
);
5934 dst
= writemask_dst(&dst
, TGSI_WRITEMASK_X
);
5935 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &dst
, &src
);
5938 else if (emit
->key
.tcs
.prim_mode
== PIPE_PRIM_TRIANGLES
) {
5939 struct tgsi_full_src_register temp_src
;
5941 if (emit
->tcs
.inner
.tgsi_index
!= INVALID_INDEX
)
5942 temp_src
= make_src_temp_reg(emit
->tcs
.inner
.temp_index
);
5944 temp_src
= make_immediate_reg_float(emit
, 1.0f
);
5946 struct tgsi_full_src_register src
=
5947 scalar_src(&temp_src
, TGSI_SWIZZLE_X
);
5948 struct tgsi_full_dst_register dst
=
5949 make_dst_reg(TGSI_FILE_OUTPUT
, emit
->tcs
.inner
.out_index
);
5950 dst
= writemask_dst(&dst
, TGSI_WRITEMASK_X
);
5951 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &dst
, &src
);
5953 if (emit
->tcs
.outer
.tgsi_index
!= INVALID_INDEX
)
5954 temp_src
= make_src_temp_reg(emit
->tcs
.outer
.temp_index
);
5956 temp_src
= make_immediate_reg_float(emit
, 1.0f
);
5958 for (i
= 0; i
< 3; i
++) {
5959 struct tgsi_full_src_register src
=
5960 scalar_src(&temp_src
, TGSI_SWIZZLE_X
+ i
);
5961 struct tgsi_full_dst_register dst
=
5962 make_dst_reg(TGSI_FILE_OUTPUT
, emit
->tcs
.outer
.out_index
+ i
);
5963 dst
= writemask_dst(&dst
, TGSI_WRITEMASK_X
);
5964 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &dst
, &src
);
5967 else if (emit
->key
.tcs
.prim_mode
== PIPE_PRIM_LINES
) {
5968 if (emit
->tcs
.outer
.tgsi_index
!= INVALID_INDEX
) {
5969 struct tgsi_full_src_register temp_src
=
5970 make_src_temp_reg(emit
->tcs
.outer
.temp_index
);
5971 for (i
= 0; i
< 2; i
++) {
5972 struct tgsi_full_src_register src
=
5973 scalar_src(&temp_src
, TGSI_SWIZZLE_X
+ i
);
5974 struct tgsi_full_dst_register dst
=
5975 make_dst_reg(TGSI_FILE_OUTPUT
,
5976 emit
->tcs
.outer
.out_index
+ i
);
5977 dst
= writemask_dst(&dst
, TGSI_WRITEMASK_X
);
5978 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &dst
, &src
);
5983 debug_printf("Unsupported primitive type");
5989 * Emit the actual clip distance instructions to be used for clipping
5990 * by copying the clip distance from the temporary registers to the
5991 * CLIPDIST registers written with the enabled planes mask.
5992 * Also copy the clip distance from the temporary to the clip distance
5993 * shadow copy register which will be referenced by the input shader
5996 emit_clip_distance_instructions(struct svga_shader_emitter_v10
*emit
)
5998 struct tgsi_full_src_register tmp_clip_dist_src
;
5999 struct tgsi_full_dst_register clip_dist_dst
;
6002 unsigned clip_plane_enable
= emit
->key
.clip_plane_enable
;
6003 unsigned clip_dist_tmp_index
= emit
->clip_dist_tmp_index
;
6004 int num_written_clipdist
= emit
->info
.num_written_clipdistance
;
6006 assert(emit
->clip_dist_out_index
!= INVALID_INDEX
);
6007 assert(emit
->clip_dist_tmp_index
!= INVALID_INDEX
);
6010 * Temporary reset the temporary clip dist register index so
6011 * that the copy to the real clip dist register will not
6012 * attempt to copy to the temporary register again
6014 emit
->clip_dist_tmp_index
= INVALID_INDEX
;
6016 for (i
= 0; i
< 2 && num_written_clipdist
> 0; i
++, num_written_clipdist
-=4) {
6018 tmp_clip_dist_src
= make_src_temp_reg(clip_dist_tmp_index
+ i
);
6021 * copy to the shadow copy for use by varying variable and
6022 * stream output. All clip distances
6023 * will be written regardless of the enabled clipping planes.
6025 clip_dist_dst
= make_dst_reg(TGSI_FILE_OUTPUT
,
6026 emit
->clip_dist_so_index
+ i
);
6028 /* MOV clip_dist_so, tmp_clip_dist */
6029 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &clip_dist_dst
,
6030 &tmp_clip_dist_src
);
6033 * copy those clip distances to enabled clipping planes
6034 * to CLIPDIST registers for clipping
6036 if (clip_plane_enable
& 0xf) {
6037 clip_dist_dst
= make_dst_reg(TGSI_FILE_OUTPUT
,
6038 emit
->clip_dist_out_index
+ i
);
6039 clip_dist_dst
= writemask_dst(&clip_dist_dst
, clip_plane_enable
& 0xf);
6041 /* MOV CLIPDIST, tmp_clip_dist */
6042 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &clip_dist_dst
,
6043 &tmp_clip_dist_src
);
6045 /* four clip planes per clip register */
6046 clip_plane_enable
>>= 4;
6049 * set the temporary clip dist register index back to the
6050 * temporary index for the next vertex
6052 emit
->clip_dist_tmp_index
= clip_dist_tmp_index
;
6055 /* Declare clip distance output registers for user-defined clip planes
6056 * or the TGSI_CLIPVERTEX output.
6059 emit_clip_distance_declarations(struct svga_shader_emitter_v10
*emit
)
6061 unsigned num_clip_planes
= util_bitcount(emit
->key
.clip_plane_enable
);
6062 unsigned index
= emit
->num_outputs
;
6063 unsigned plane_mask
;
6065 assert(emit
->unit
!= PIPE_SHADER_FRAGMENT
);
6066 assert(num_clip_planes
<= 8);
6068 if (emit
->clip_mode
!= CLIP_LEGACY
&&
6069 emit
->clip_mode
!= CLIP_VERTEX
) {
6073 if (num_clip_planes
== 0)
6076 /* Convert clip vertex to clip distances only in the last vertex stage */
6077 if (!emit
->key
.last_vertex_stage
)
6080 /* Declare one or two clip output registers. The number of components
6081 * in the mask reflects the number of clip planes. For example, if 5
6082 * clip planes are needed, we'll declare outputs similar to:
6083 * dcl_output_siv o2.xyzw, clip_distance
6084 * dcl_output_siv o3.x, clip_distance
6086 emit
->clip_dist_out_index
= index
; /* save the starting clip dist reg index */
6088 plane_mask
= (1 << num_clip_planes
) - 1;
6089 if (plane_mask
& 0xf) {
6090 unsigned cmask
= plane_mask
& VGPU10_OPERAND_4_COMPONENT_MASK_ALL
;
6091 emit_output_declaration(emit
, VGPU10_OPCODE_DCL_OUTPUT_SIV
, index
,
6092 VGPU10_NAME_CLIP_DISTANCE
, cmask
, TRUE
,
6093 SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE
);
6094 emit
->num_outputs
++;
6096 if (plane_mask
& 0xf0) {
6097 unsigned cmask
= (plane_mask
>> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL
;
6098 emit_output_declaration(emit
, VGPU10_OPCODE_DCL_OUTPUT_SIV
, index
+ 1,
6099 VGPU10_NAME_CLIP_DISTANCE
, cmask
, TRUE
,
6100 SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE
);
6101 emit
->num_outputs
++;
6107 * Emit the instructions for writing to the clip distance registers
6108 * to handle legacy/automatic clip planes.
6109 * For each clip plane, the distance is the dot product of the vertex
6110 * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients.
6111 * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE
6112 * output registers already declared.
6115 emit_clip_distance_from_vpos(struct svga_shader_emitter_v10
*emit
,
6116 unsigned vpos_tmp_index
)
6118 unsigned i
, num_clip_planes
= util_bitcount(emit
->key
.clip_plane_enable
);
6120 assert(emit
->clip_mode
== CLIP_LEGACY
);
6121 assert(num_clip_planes
<= 8);
6123 assert(emit
->unit
== PIPE_SHADER_VERTEX
||
6124 emit
->unit
== PIPE_SHADER_GEOMETRY
||
6125 emit
->unit
== PIPE_SHADER_TESS_EVAL
);
6127 for (i
= 0; i
< num_clip_planes
; i
++) {
6128 struct tgsi_full_dst_register dst
;
6129 struct tgsi_full_src_register plane_src
, vpos_src
;
6130 unsigned reg_index
= emit
->clip_dist_out_index
+ i
/ 4;
6131 unsigned comp
= i
% 4;
6132 unsigned writemask
= VGPU10_OPERAND_4_COMPONENT_MASK_X
<< comp
;
6134 /* create dst, src regs */
6135 dst
= make_dst_reg(TGSI_FILE_OUTPUT
, reg_index
);
6136 dst
= writemask_dst(&dst
, writemask
);
6138 plane_src
= make_src_const_reg(emit
->clip_plane_const
[i
]);
6139 vpos_src
= make_src_temp_reg(vpos_tmp_index
);
6141 /* DP4 clip_dist, plane, vpos */
6142 emit_instruction_op2(emit
, VGPU10_OPCODE_DP4
, &dst
,
6143 &plane_src
, &vpos_src
);
6149 * Emit the instructions for computing the clip distance results from
6150 * the clip vertex temporary.
6151 * For each clip plane, the distance is the dot product of the clip vertex
6152 * position (found in a temp reg) and the clip plane coefficients.
6155 emit_clip_vertex_instructions(struct svga_shader_emitter_v10
*emit
)
6157 const unsigned num_clip
= util_bitcount(emit
->key
.clip_plane_enable
);
6159 struct tgsi_full_dst_register dst
;
6160 struct tgsi_full_src_register clipvert_src
;
6161 const unsigned clip_vertex_tmp
= emit
->clip_vertex_tmp_index
;
6163 assert(emit
->unit
== PIPE_SHADER_VERTEX
||
6164 emit
->unit
== PIPE_SHADER_GEOMETRY
||
6165 emit
->unit
== PIPE_SHADER_TESS_EVAL
);
6167 assert(emit
->clip_mode
== CLIP_VERTEX
);
6169 clipvert_src
= make_src_temp_reg(clip_vertex_tmp
);
6171 for (i
= 0; i
< num_clip
; i
++) {
6172 struct tgsi_full_src_register plane_src
;
6173 unsigned reg_index
= emit
->clip_dist_out_index
+ i
/ 4;
6174 unsigned comp
= i
% 4;
6175 unsigned writemask
= VGPU10_OPERAND_4_COMPONENT_MASK_X
<< comp
;
6177 /* create dst, src regs */
6178 dst
= make_dst_reg(TGSI_FILE_OUTPUT
, reg_index
);
6179 dst
= writemask_dst(&dst
, writemask
);
6181 plane_src
= make_src_const_reg(emit
->clip_plane_const
[i
]);
6183 /* DP4 clip_dist, plane, vpos */
6184 emit_instruction_op2(emit
, VGPU10_OPCODE_DP4
, &dst
,
6185 &plane_src
, &clipvert_src
);
6188 /* copy temporary clip vertex register to the clip vertex register */
6190 assert(emit
->clip_vertex_out_index
!= INVALID_INDEX
);
6193 * temporary reset the temporary clip vertex register index so
6194 * that copy to the clip vertex register will not attempt
6195 * to copy to the temporary register again
6197 emit
->clip_vertex_tmp_index
= INVALID_INDEX
;
6199 /* MOV clip_vertex, clip_vertex_tmp */
6200 dst
= make_dst_reg(TGSI_FILE_OUTPUT
, emit
->clip_vertex_out_index
);
6201 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
,
6202 &dst
, &clipvert_src
);
6205 * set the temporary clip vertex register index back to the
6206 * temporary index for the next vertex
6208 emit
->clip_vertex_tmp_index
= clip_vertex_tmp
;
6212 * Emit code to convert RGBA to BGRA
6215 emit_swap_r_b(struct svga_shader_emitter_v10
*emit
,
6216 const struct tgsi_full_dst_register
*dst
,
6217 const struct tgsi_full_src_register
*src
)
6219 struct tgsi_full_src_register bgra_src
=
6220 swizzle_src(src
, TGSI_SWIZZLE_Z
, TGSI_SWIZZLE_Y
, TGSI_SWIZZLE_X
, TGSI_SWIZZLE_W
);
6222 begin_emit_instruction(emit
);
6223 emit_opcode(emit
, VGPU10_OPCODE_MOV
, FALSE
);
6224 emit_dst_register(emit
, dst
);
6225 emit_src_register(emit
, &bgra_src
);
6226 end_emit_instruction(emit
);
6230 /** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */
6232 emit_puint_to_snorm(struct svga_shader_emitter_v10
*emit
,
6233 const struct tgsi_full_dst_register
*dst
,
6234 const struct tgsi_full_src_register
*src
)
6236 struct tgsi_full_src_register half
= make_immediate_reg_float(emit
, 0.5f
);
6237 struct tgsi_full_src_register two
=
6238 make_immediate_reg_float4(emit
, 2.0f
, 2.0f
, 2.0f
, 3.0f
);
6239 struct tgsi_full_src_register neg_two
=
6240 make_immediate_reg_float4(emit
, -2.0f
, -2.0f
, -2.0f
, -1.66666f
);
6242 unsigned val_tmp
= get_temp_index(emit
);
6243 struct tgsi_full_dst_register val_dst
= make_dst_temp_reg(val_tmp
);
6244 struct tgsi_full_src_register val_src
= make_src_temp_reg(val_tmp
);
6246 unsigned bias_tmp
= get_temp_index(emit
);
6247 struct tgsi_full_dst_register bias_dst
= make_dst_temp_reg(bias_tmp
);
6248 struct tgsi_full_src_register bias_src
= make_src_temp_reg(bias_tmp
);
6250 /* val = src * 2.0 */
6251 emit_instruction_op2(emit
, VGPU10_OPCODE_MUL
, &val_dst
, src
, &two
);
6253 /* bias = src > 0.5 */
6254 emit_instruction_op2(emit
, VGPU10_OPCODE_GE
, &bias_dst
, src
, &half
);
6256 /* bias = bias & -2.0 */
6257 emit_instruction_op2(emit
, VGPU10_OPCODE_AND
, &bias_dst
,
6258 &bias_src
, &neg_two
);
6260 /* dst = val + bias */
6261 emit_instruction_op2(emit
, VGPU10_OPCODE_ADD
, dst
,
6262 &val_src
, &bias_src
);
6264 free_temp_indexes(emit
);
6268 /** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */
6270 emit_puint_to_uscaled(struct svga_shader_emitter_v10
*emit
,
6271 const struct tgsi_full_dst_register
*dst
,
6272 const struct tgsi_full_src_register
*src
)
6274 struct tgsi_full_src_register scale
=
6275 make_immediate_reg_float4(emit
, 1023.0f
, 1023.0f
, 1023.0f
, 3.0f
);
6277 /* dst = src * scale */
6278 emit_instruction_op2(emit
, VGPU10_OPCODE_MUL
, dst
, src
, &scale
);
6282 /** Convert from R32_UINT to 10_10_10_2_sscaled */
6284 emit_puint_to_sscaled(struct svga_shader_emitter_v10
*emit
,
6285 const struct tgsi_full_dst_register
*dst
,
6286 const struct tgsi_full_src_register
*src
)
6288 struct tgsi_full_src_register lshift
=
6289 make_immediate_reg_int4(emit
, 22, 12, 2, 0);
6290 struct tgsi_full_src_register rshift
=
6291 make_immediate_reg_int4(emit
, 22, 22, 22, 30);
6293 struct tgsi_full_src_register src_xxxx
= scalar_src(src
, TGSI_SWIZZLE_X
);
6295 unsigned tmp
= get_temp_index(emit
);
6296 struct tgsi_full_dst_register tmp_dst
= make_dst_temp_reg(tmp
);
6297 struct tgsi_full_src_register tmp_src
= make_src_temp_reg(tmp
);
6300 * r = (pixel << 22) >> 22; # signed int in [511, -512]
6301 * g = (pixel << 12) >> 22; # signed int in [511, -512]
6302 * b = (pixel << 2) >> 22; # signed int in [511, -512]
6303 * a = (pixel << 0) >> 30; # signed int in [1, -2]
6304 * dst = i_to_f(r,g,b,a); # convert to float
6306 emit_instruction_op2(emit
, VGPU10_OPCODE_ISHL
, &tmp_dst
,
6307 &src_xxxx
, &lshift
);
6308 emit_instruction_op2(emit
, VGPU10_OPCODE_ISHR
, &tmp_dst
,
6310 emit_instruction_op1(emit
, VGPU10_OPCODE_ITOF
, dst
, &tmp_src
);
6312 free_temp_indexes(emit
);
6317 * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
6320 emit_arl_uarl(struct svga_shader_emitter_v10
*emit
,
6321 const struct tgsi_full_instruction
*inst
)
6323 unsigned index
= inst
->Dst
[0].Register
.Index
;
6324 struct tgsi_full_dst_register dst
;
6325 VGPU10_OPCODE_TYPE opcode
;
6327 assert(index
< MAX_VGPU10_ADDR_REGS
);
6328 dst
= make_dst_temp_reg(emit
->address_reg_index
[index
]);
6329 dst
= writemask_dst(&dst
, inst
->Dst
[0].Register
.WriteMask
);
6333 * FTOI address_tmp, s0
6337 * MOV address_tmp, s0
6339 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_ARL
)
6340 opcode
= VGPU10_OPCODE_FTOI
;
6342 opcode
= VGPU10_OPCODE_MOV
;
6344 emit_instruction_op1(emit
, opcode
, &dst
, &inst
->Src
[0]);
6351 * Emit code for TGSI_OPCODE_CAL instruction.
6354 emit_cal(struct svga_shader_emitter_v10
*emit
,
6355 const struct tgsi_full_instruction
*inst
)
6357 unsigned label
= inst
->Label
.Label
;
6358 VGPU10OperandToken0 operand
;
6360 operand
.operandType
= VGPU10_OPERAND_TYPE_LABEL
;
6362 begin_emit_instruction(emit
);
6363 emit_dword(emit
, operand
.value
);
6364 emit_dword(emit
, label
);
6365 end_emit_instruction(emit
);
6372 * Emit code for TGSI_OPCODE_IABS instruction.
6375 emit_iabs(struct svga_shader_emitter_v10
*emit
,
6376 const struct tgsi_full_instruction
*inst
)
6378 /* dst.x = (src0.x < 0) ? -src0.x : src0.x
6379 * dst.y = (src0.y < 0) ? -src0.y : src0.y
6380 * dst.z = (src0.z < 0) ? -src0.z : src0.z
6381 * dst.w = (src0.w < 0) ? -src0.w : src0.w
6384 * IMAX dst, src, neg(src)
6386 struct tgsi_full_src_register neg_src
= negate_src(&inst
->Src
[0]);
6387 emit_instruction_op2(emit
, VGPU10_OPCODE_IMAX
, &inst
->Dst
[0],
6388 &inst
->Src
[0], &neg_src
);
6395 * Emit code for TGSI_OPCODE_CMP instruction.
6398 emit_cmp(struct svga_shader_emitter_v10
*emit
,
6399 const struct tgsi_full_instruction
*inst
)
6401 /* dst.x = (src0.x < 0) ? src1.x : src2.x
6402 * dst.y = (src0.y < 0) ? src1.y : src2.y
6403 * dst.z = (src0.z < 0) ? src1.z : src2.z
6404 * dst.w = (src0.w < 0) ? src1.w : src2.w
6408 * MOVC dst, tmp, src1, src2
6410 struct tgsi_full_src_register zero
= make_immediate_reg_float(emit
, 0.0f
);
6411 unsigned tmp
= get_temp_index(emit
);
6412 struct tgsi_full_src_register tmp_src
= make_src_temp_reg(tmp
);
6413 struct tgsi_full_dst_register tmp_dst
= make_dst_temp_reg(tmp
);
6415 emit_instruction_opn(emit
, VGPU10_OPCODE_LT
, &tmp_dst
,
6416 &inst
->Src
[0], &zero
, NULL
, FALSE
,
6417 inst
->Instruction
.Precise
);
6418 emit_instruction_opn(emit
, VGPU10_OPCODE_MOVC
, &inst
->Dst
[0],
6419 &tmp_src
, &inst
->Src
[1], &inst
->Src
[2],
6420 inst
->Instruction
.Saturate
, FALSE
);
6422 free_temp_indexes(emit
);
6429 * Emit code for TGSI_OPCODE_DST instruction.
6432 emit_dst(struct svga_shader_emitter_v10
*emit
,
6433 const struct tgsi_full_instruction
*inst
)
6437 * dst.y = src0.y * src1.y
6442 struct tgsi_full_src_register s0_yyyy
=
6443 scalar_src(&inst
->Src
[0], TGSI_SWIZZLE_Y
);
6444 struct tgsi_full_src_register s0_zzzz
=
6445 scalar_src(&inst
->Src
[0], TGSI_SWIZZLE_Z
);
6446 struct tgsi_full_src_register s1_yyyy
=
6447 scalar_src(&inst
->Src
[1], TGSI_SWIZZLE_Y
);
6448 struct tgsi_full_src_register s1_wwww
=
6449 scalar_src(&inst
->Src
[1], TGSI_SWIZZLE_W
);
6452 * If dst and either src0 and src1 are the same we need
6453 * to create a temporary for it and insert a extra move.
6455 unsigned tmp_move
= get_temp_index(emit
);
6456 struct tgsi_full_src_register move_src
= make_src_temp_reg(tmp_move
);
6457 struct tgsi_full_dst_register move_dst
= make_dst_temp_reg(tmp_move
);
6459 /* MOV dst.x, 1.0 */
6460 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_X
) {
6461 struct tgsi_full_dst_register dst_x
=
6462 writemask_dst(&move_dst
, TGSI_WRITEMASK_X
);
6463 struct tgsi_full_src_register one
= make_immediate_reg_float(emit
, 1.0f
);
6465 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &dst_x
, &one
);
6468 /* MUL dst.y, s0.y, s1.y */
6469 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
6470 struct tgsi_full_dst_register dst_y
=
6471 writemask_dst(&move_dst
, TGSI_WRITEMASK_Y
);
6473 emit_instruction_opn(emit
, VGPU10_OPCODE_MUL
, &dst_y
, &s0_yyyy
,
6474 &s1_yyyy
, NULL
, inst
->Instruction
.Saturate
,
6475 inst
->Instruction
.Precise
);
6478 /* MOV dst.z, s0.z */
6479 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
6480 struct tgsi_full_dst_register dst_z
=
6481 writemask_dst(&move_dst
, TGSI_WRITEMASK_Z
);
6483 emit_instruction_opn(emit
, VGPU10_OPCODE_MOV
,
6484 &dst_z
, &s0_zzzz
, NULL
, NULL
,
6485 inst
->Instruction
.Saturate
,
6486 inst
->Instruction
.Precise
);
6489 /* MOV dst.w, s1.w */
6490 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_W
) {
6491 struct tgsi_full_dst_register dst_w
=
6492 writemask_dst(&move_dst
, TGSI_WRITEMASK_W
);
6494 emit_instruction_opn(emit
, VGPU10_OPCODE_MOV
,
6495 &dst_w
, &s1_wwww
, NULL
, NULL
,
6496 inst
->Instruction
.Saturate
,
6497 inst
->Instruction
.Precise
);
6500 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &inst
->Dst
[0], &move_src
);
6501 free_temp_indexes(emit
);
6508 * A helper function to return the stream index as specified in
6509 * the immediate register
6511 static inline unsigned
6512 find_stream_index(struct svga_shader_emitter_v10
*emit
,
6513 const struct tgsi_full_src_register
*src
)
6515 return emit
->immediates
[src
->Register
.Index
][src
->Register
.SwizzleX
].Int
;
6520 * Emit code for TGSI_OPCODE_ENDPRIM (GS only)
6523 emit_endprim(struct svga_shader_emitter_v10
*emit
,
6524 const struct tgsi_full_instruction
*inst
)
6526 assert(emit
->unit
== PIPE_SHADER_GEOMETRY
);
6528 begin_emit_instruction(emit
);
6529 if (emit
->version
>= 50) {
6530 unsigned streamIndex
= find_stream_index(emit
, &inst
->Src
[0]);
6532 if (emit
->info
.num_stream_output_components
[streamIndex
] == 0) {
6534 * If there is no output for this stream, discard this instruction.
6536 emit
->discard_instruction
= TRUE
;
6539 emit_opcode(emit
, VGPU10_OPCODE_CUT_STREAM
, FALSE
);
6540 assert(inst
->Src
[0].Register
.File
== TGSI_FILE_IMMEDIATE
);
6541 emit_stream_register(emit
, streamIndex
);
6545 emit_opcode(emit
, VGPU10_OPCODE_CUT
, FALSE
);
6547 end_emit_instruction(emit
);
6553 * Emit code for TGSI_OPCODE_EX2 (2^x) instruction.
6556 emit_ex2(struct svga_shader_emitter_v10
*emit
,
6557 const struct tgsi_full_instruction
*inst
)
6559 /* Note that TGSI_OPCODE_EX2 computes only one value from src.x
6560 * while VGPU10 computes four values.
6563 * dst.xyzw = 2.0 ^ src.x
6566 struct tgsi_full_src_register src_xxxx
=
6567 swizzle_src(&inst
->Src
[0], TGSI_SWIZZLE_X
, TGSI_SWIZZLE_X
,
6568 TGSI_SWIZZLE_X
, TGSI_SWIZZLE_X
);
6570 /* EXP tmp, s0.xxxx */
6571 emit_instruction_opn(emit
, VGPU10_OPCODE_EXP
, &inst
->Dst
[0], &src_xxxx
,
6573 inst
->Instruction
.Saturate
,
6574 inst
->Instruction
.Precise
);
6581 * Emit code for TGSI_OPCODE_EXP instruction.
6584 emit_exp(struct svga_shader_emitter_v10
*emit
,
6585 const struct tgsi_full_instruction
*inst
)
6588 * dst.x = 2 ^ floor(s0.x)
6589 * dst.y = s0.x - floor(s0.x)
6594 struct tgsi_full_src_register src_xxxx
=
6595 scalar_src(&inst
->Src
[0], TGSI_SWIZZLE_X
);
6596 unsigned tmp
= get_temp_index(emit
);
6597 struct tgsi_full_src_register tmp_src
= make_src_temp_reg(tmp
);
6598 struct tgsi_full_dst_register tmp_dst
= make_dst_temp_reg(tmp
);
6601 * If dst and src are the same we need to create
6602 * a temporary for it and insert a extra move.
6604 unsigned tmp_move
= get_temp_index(emit
);
6605 struct tgsi_full_src_register move_src
= make_src_temp_reg(tmp_move
);
6606 struct tgsi_full_dst_register move_dst
= make_dst_temp_reg(tmp_move
);
6608 /* only use X component of temp reg */
6609 tmp_dst
= writemask_dst(&tmp_dst
, TGSI_WRITEMASK_X
);
6610 tmp_src
= scalar_src(&tmp_src
, TGSI_SWIZZLE_X
);
6612 /* ROUND_NI tmp.x, s0.x */
6613 emit_instruction_op1(emit
, VGPU10_OPCODE_ROUND_NI
, &tmp_dst
,
6614 &src_xxxx
); /* round to -infinity */
6616 /* EXP dst.x, tmp.x */
6617 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_X
) {
6618 struct tgsi_full_dst_register dst_x
=
6619 writemask_dst(&move_dst
, TGSI_WRITEMASK_X
);
6621 emit_instruction_opn(emit
, VGPU10_OPCODE_EXP
, &dst_x
, &tmp_src
,
6623 inst
->Instruction
.Saturate
,
6624 inst
->Instruction
.Precise
);
6627 /* ADD dst.y, s0.x, -tmp */
6628 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
6629 struct tgsi_full_dst_register dst_y
=
6630 writemask_dst(&move_dst
, TGSI_WRITEMASK_Y
);
6631 struct tgsi_full_src_register neg_tmp_src
= negate_src(&tmp_src
);
6633 emit_instruction_opn(emit
, VGPU10_OPCODE_ADD
, &dst_y
, &src_xxxx
,
6635 inst
->Instruction
.Saturate
,
6636 inst
->Instruction
.Precise
);
6639 /* EXP dst.z, s0.x */
6640 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
6641 struct tgsi_full_dst_register dst_z
=
6642 writemask_dst(&move_dst
, TGSI_WRITEMASK_Z
);
6644 emit_instruction_opn(emit
, VGPU10_OPCODE_EXP
, &dst_z
, &src_xxxx
,
6646 inst
->Instruction
.Saturate
,
6647 inst
->Instruction
.Precise
);
6650 /* MOV dst.w, 1.0 */
6651 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_W
) {
6652 struct tgsi_full_dst_register dst_w
=
6653 writemask_dst(&move_dst
, TGSI_WRITEMASK_W
);
6654 struct tgsi_full_src_register one
= make_immediate_reg_float(emit
, 1.0f
);
6656 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &dst_w
, &one
);
6659 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &inst
->Dst
[0], &move_src
);
6661 free_temp_indexes(emit
);
6668 * Emit code for TGSI_OPCODE_IF instruction.
6671 emit_if(struct svga_shader_emitter_v10
*emit
,
6672 const struct tgsi_full_src_register
*src
)
6674 VGPU10OpcodeToken0 opcode0
;
6676 /* The src register should be a scalar */
6677 assert(src
->Register
.SwizzleX
== src
->Register
.SwizzleY
&&
6678 src
->Register
.SwizzleX
== src
->Register
.SwizzleZ
&&
6679 src
->Register
.SwizzleX
== src
->Register
.SwizzleW
);
6681 /* The only special thing here is that we need to set the
6682 * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if
6683 * src.x is non-zero.
6686 opcode0
.opcodeType
= VGPU10_OPCODE_IF
;
6687 opcode0
.testBoolean
= VGPU10_INSTRUCTION_TEST_NONZERO
;
6689 begin_emit_instruction(emit
);
6690 emit_dword(emit
, opcode0
.value
);
6691 emit_src_register(emit
, src
);
6692 end_emit_instruction(emit
);
6699 * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of
6700 * the register components are negative).
6703 emit_kill_if(struct svga_shader_emitter_v10
*emit
,
6704 const struct tgsi_full_instruction
*inst
)
6706 unsigned tmp
= get_temp_index(emit
);
6707 struct tgsi_full_src_register tmp_src
= make_src_temp_reg(tmp
);
6708 struct tgsi_full_dst_register tmp_dst
= make_dst_temp_reg(tmp
);
6710 struct tgsi_full_src_register zero
= make_immediate_reg_float(emit
, 0.0f
);
6712 struct tgsi_full_dst_register tmp_dst_x
=
6713 writemask_dst(&tmp_dst
, TGSI_WRITEMASK_X
);
6714 struct tgsi_full_src_register tmp_src_xxxx
=
6715 scalar_src(&tmp_src
, TGSI_SWIZZLE_X
);
6717 /* tmp = src[0] < 0.0 */
6718 emit_instruction_op2(emit
, VGPU10_OPCODE_LT
, &tmp_dst
, &inst
->Src
[0], &zero
);
6720 if (!same_swizzle_terms(&inst
->Src
[0])) {
6721 /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
6722 * logically OR the swizzle terms. Most uses of KILL_IF only
6723 * test one channel so it's good to avoid these extra steps.
6725 struct tgsi_full_src_register tmp_src_yyyy
=
6726 scalar_src(&tmp_src
, TGSI_SWIZZLE_Y
);
6727 struct tgsi_full_src_register tmp_src_zzzz
=
6728 scalar_src(&tmp_src
, TGSI_SWIZZLE_Z
);
6729 struct tgsi_full_src_register tmp_src_wwww
=
6730 scalar_src(&tmp_src
, TGSI_SWIZZLE_W
);
6732 emit_instruction_op2(emit
, VGPU10_OPCODE_OR
, &tmp_dst_x
, &tmp_src_xxxx
,
6734 emit_instruction_op2(emit
, VGPU10_OPCODE_OR
, &tmp_dst_x
, &tmp_src_xxxx
,
6736 emit_instruction_op2(emit
, VGPU10_OPCODE_OR
, &tmp_dst_x
, &tmp_src_xxxx
,
6740 begin_emit_instruction(emit
);
6741 emit_discard_opcode(emit
, TRUE
); /* discard if src0.x is non-zero */
6742 emit_src_register(emit
, &tmp_src_xxxx
);
6743 end_emit_instruction(emit
);
6745 free_temp_indexes(emit
);
6752 * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard).
6755 emit_kill(struct svga_shader_emitter_v10
*emit
,
6756 const struct tgsi_full_instruction
*inst
)
6758 struct tgsi_full_src_register zero
= make_immediate_reg_float(emit
, 0.0f
);
6760 /* DISCARD if 0.0 is zero */
6761 begin_emit_instruction(emit
);
6762 emit_discard_opcode(emit
, FALSE
);
6763 emit_src_register(emit
, &zero
);
6764 end_emit_instruction(emit
);
6771 * Emit code for TGSI_OPCODE_LG2 instruction.
6774 emit_lg2(struct svga_shader_emitter_v10
*emit
,
6775 const struct tgsi_full_instruction
*inst
)
6777 /* Note that TGSI_OPCODE_LG2 computes only one value from src.x
6778 * while VGPU10 computes four values.
6781 * dst.xyzw = log2(src.x)
6784 struct tgsi_full_src_register src_xxxx
=
6785 swizzle_src(&inst
->Src
[0], TGSI_SWIZZLE_X
, TGSI_SWIZZLE_X
,
6786 TGSI_SWIZZLE_X
, TGSI_SWIZZLE_X
);
6788 /* LOG tmp, s0.xxxx */
6789 emit_instruction_opn(emit
, VGPU10_OPCODE_LOG
,
6790 &inst
->Dst
[0], &src_xxxx
, NULL
, NULL
,
6791 inst
->Instruction
.Saturate
,
6792 inst
->Instruction
.Precise
);
6799 * Emit code for TGSI_OPCODE_LIT instruction.
6802 emit_lit(struct svga_shader_emitter_v10
*emit
,
6803 const struct tgsi_full_instruction
*inst
)
6805 struct tgsi_full_src_register one
= make_immediate_reg_float(emit
, 1.0f
);
6808 * If dst and src are the same we need to create
6809 * a temporary for it and insert a extra move.
6811 unsigned tmp_move
= get_temp_index(emit
);
6812 struct tgsi_full_src_register move_src
= make_src_temp_reg(tmp_move
);
6813 struct tgsi_full_dst_register move_dst
= make_dst_temp_reg(tmp_move
);
6817 * dst.y = max(src.x, 0)
6818 * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
6822 /* MOV dst.x, 1.0 */
6823 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_X
) {
6824 struct tgsi_full_dst_register dst_x
=
6825 writemask_dst(&move_dst
, TGSI_WRITEMASK_X
);
6826 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &dst_x
, &one
);
6829 /* MOV dst.w, 1.0 */
6830 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_W
) {
6831 struct tgsi_full_dst_register dst_w
=
6832 writemask_dst(&move_dst
, TGSI_WRITEMASK_W
);
6833 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &dst_w
, &one
);
6836 /* MAX dst.y, src.x, 0.0 */
6837 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
6838 struct tgsi_full_dst_register dst_y
=
6839 writemask_dst(&move_dst
, TGSI_WRITEMASK_Y
);
6840 struct tgsi_full_src_register zero
=
6841 make_immediate_reg_float(emit
, 0.0f
);
6842 struct tgsi_full_src_register src_xxxx
=
6843 swizzle_src(&inst
->Src
[0], TGSI_SWIZZLE_X
, TGSI_SWIZZLE_X
,
6844 TGSI_SWIZZLE_X
, TGSI_SWIZZLE_X
);
6846 emit_instruction_opn(emit
, VGPU10_OPCODE_MAX
, &dst_y
, &src_xxxx
,
6847 &zero
, NULL
, inst
->Instruction
.Saturate
, FALSE
);
6851 * tmp1 = clamp(src.w, -128, 128);
6852 * MAX tmp1, src.w, -128
6853 * MIN tmp1, tmp1, 128
6855 * tmp2 = max(tmp2, 0);
6856 * MAX tmp2, src.y, 0
6858 * tmp1 = pow(tmp2, tmp1);
6860 * MUL tmp1, tmp2, tmp1
6863 * tmp1 = (src.w == 0) ? 1 : tmp1;
6865 * MOVC tmp1, tmp2, 1.0, tmp1
6867 * dst.z = (0 < src.x) ? tmp1 : 0;
6869 * MOVC dst.z, tmp2, tmp1, 0.0
6871 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
6872 struct tgsi_full_dst_register dst_z
=
6873 writemask_dst(&move_dst
, TGSI_WRITEMASK_Z
);
6875 unsigned tmp1
= get_temp_index(emit
);
6876 struct tgsi_full_src_register tmp1_src
= make_src_temp_reg(tmp1
);
6877 struct tgsi_full_dst_register tmp1_dst
= make_dst_temp_reg(tmp1
);
6878 unsigned tmp2
= get_temp_index(emit
);
6879 struct tgsi_full_src_register tmp2_src
= make_src_temp_reg(tmp2
);
6880 struct tgsi_full_dst_register tmp2_dst
= make_dst_temp_reg(tmp2
);
6882 struct tgsi_full_src_register src_xxxx
=
6883 scalar_src(&inst
->Src
[0], TGSI_SWIZZLE_X
);
6884 struct tgsi_full_src_register src_yyyy
=
6885 scalar_src(&inst
->Src
[0], TGSI_SWIZZLE_Y
);
6886 struct tgsi_full_src_register src_wwww
=
6887 scalar_src(&inst
->Src
[0], TGSI_SWIZZLE_W
);
6889 struct tgsi_full_src_register zero
=
6890 make_immediate_reg_float(emit
, 0.0f
);
6891 struct tgsi_full_src_register lowerbound
=
6892 make_immediate_reg_float(emit
, -128.0f
);
6893 struct tgsi_full_src_register upperbound
=
6894 make_immediate_reg_float(emit
, 128.0f
);
6896 emit_instruction_op2(emit
, VGPU10_OPCODE_MAX
, &tmp1_dst
, &src_wwww
,
6898 emit_instruction_op2(emit
, VGPU10_OPCODE_MIN
, &tmp1_dst
, &tmp1_src
,
6900 emit_instruction_op2(emit
, VGPU10_OPCODE_MAX
, &tmp2_dst
, &src_yyyy
,
6903 /* POW tmp1, tmp2, tmp1 */
6904 /* LOG tmp2, tmp2 */
6905 emit_instruction_op1(emit
, VGPU10_OPCODE_LOG
, &tmp2_dst
, &tmp2_src
);
6907 /* MUL tmp1, tmp2, tmp1 */
6908 emit_instruction_op2(emit
, VGPU10_OPCODE_MUL
, &tmp1_dst
, &tmp2_src
,
6911 /* EXP tmp1, tmp1 */
6912 emit_instruction_op1(emit
, VGPU10_OPCODE_EXP
, &tmp1_dst
, &tmp1_src
);
6914 /* EQ tmp2, 0, src.w */
6915 emit_instruction_op2(emit
, VGPU10_OPCODE_EQ
, &tmp2_dst
, &zero
, &src_wwww
);
6916 /* MOVC tmp1.z, tmp2, tmp1, 1.0 */
6917 emit_instruction_op3(emit
, VGPU10_OPCODE_MOVC
, &tmp1_dst
,
6918 &tmp2_src
, &one
, &tmp1_src
);
6920 /* LT tmp2, 0, src.x */
6921 emit_instruction_op2(emit
, VGPU10_OPCODE_LT
, &tmp2_dst
, &zero
, &src_xxxx
);
6922 /* MOVC dst.z, tmp2, tmp1, 0.0 */
6923 emit_instruction_op3(emit
, VGPU10_OPCODE_MOVC
, &dst_z
,
6924 &tmp2_src
, &tmp1_src
, &zero
);
6927 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &inst
->Dst
[0], &move_src
);
6928 free_temp_indexes(emit
);
6935 * Emit Level Of Detail Query (LODQ) instruction.
6938 emit_lodq(struct svga_shader_emitter_v10
*emit
,
6939 const struct tgsi_full_instruction
*inst
)
6941 const uint unit
= inst
->Src
[1].Register
.Index
;
6943 assert(emit
->version
>= 41);
6945 /* LOD dst, coord, resource, sampler */
6946 begin_emit_instruction(emit
);
6947 emit_opcode(emit
, VGPU10_OPCODE_LOD
, FALSE
);
6948 emit_dst_register(emit
, &inst
->Dst
[0]);
6949 emit_src_register(emit
, &inst
->Src
[0]); /* coord */
6950 emit_resource_register(emit
, unit
);
6951 emit_sampler_register(emit
, unit
);
6952 end_emit_instruction(emit
);
6959 * Emit code for TGSI_OPCODE_LOG instruction.
6962 emit_log(struct svga_shader_emitter_v10
*emit
,
6963 const struct tgsi_full_instruction
*inst
)
6966 * dst.x = floor(lg2(abs(s0.x)))
6967 * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x))))
6968 * dst.z = lg2(abs(s0.x))
6972 struct tgsi_full_src_register src_xxxx
=
6973 scalar_src(&inst
->Src
[0], TGSI_SWIZZLE_X
);
6974 unsigned tmp
= get_temp_index(emit
);
6975 struct tgsi_full_src_register tmp_src
= make_src_temp_reg(tmp
);
6976 struct tgsi_full_dst_register tmp_dst
= make_dst_temp_reg(tmp
);
6977 struct tgsi_full_src_register abs_src_xxxx
= absolute_src(&src_xxxx
);
6979 /* only use X component of temp reg */
6980 tmp_dst
= writemask_dst(&tmp_dst
, TGSI_WRITEMASK_X
);
6981 tmp_src
= scalar_src(&tmp_src
, TGSI_SWIZZLE_X
);
6983 /* LOG tmp.x, abs(s0.x) */
6984 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_XYZ
) {
6985 emit_instruction_op1(emit
, VGPU10_OPCODE_LOG
, &tmp_dst
, &abs_src_xxxx
);
6988 /* MOV dst.z, tmp.x */
6989 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Z
) {
6990 struct tgsi_full_dst_register dst_z
=
6991 writemask_dst(&inst
->Dst
[0], TGSI_WRITEMASK_Z
);
6993 emit_instruction_opn(emit
, VGPU10_OPCODE_MOV
,
6994 &dst_z
, &tmp_src
, NULL
, NULL
,
6995 inst
->Instruction
.Saturate
, FALSE
);
6998 /* FLR tmp.x, tmp.x */
6999 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_XY
) {
7000 emit_instruction_op1(emit
, VGPU10_OPCODE_ROUND_NI
, &tmp_dst
, &tmp_src
);
7003 /* MOV dst.x, tmp.x */
7004 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_X
) {
7005 struct tgsi_full_dst_register dst_x
=
7006 writemask_dst(&inst
->Dst
[0], TGSI_WRITEMASK_X
);
7008 emit_instruction_opn(emit
, VGPU10_OPCODE_MOV
,
7009 &dst_x
, &tmp_src
, NULL
, NULL
,
7010 inst
->Instruction
.Saturate
, FALSE
);
7013 /* EXP tmp.x, tmp.x */
7014 /* DIV dst.y, abs(s0.x), tmp.x */
7015 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_Y
) {
7016 struct tgsi_full_dst_register dst_y
=
7017 writemask_dst(&inst
->Dst
[0], TGSI_WRITEMASK_Y
);
7019 emit_instruction_op1(emit
, VGPU10_OPCODE_EXP
, &tmp_dst
, &tmp_src
);
7020 emit_instruction_opn(emit
, VGPU10_OPCODE_DIV
, &dst_y
, &abs_src_xxxx
,
7021 &tmp_src
, NULL
, inst
->Instruction
.Saturate
, FALSE
);
7024 /* MOV dst.w, 1.0 */
7025 if (inst
->Dst
[0].Register
.WriteMask
& TGSI_WRITEMASK_W
) {
7026 struct tgsi_full_dst_register dst_w
=
7027 writemask_dst(&inst
->Dst
[0], TGSI_WRITEMASK_W
);
7028 struct tgsi_full_src_register one
=
7029 make_immediate_reg_float(emit
, 1.0f
);
7031 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &dst_w
, &one
);
7034 free_temp_indexes(emit
);
7041 * Emit code for TGSI_OPCODE_LRP instruction.
7044 emit_lrp(struct svga_shader_emitter_v10
*emit
,
7045 const struct tgsi_full_instruction
*inst
)
7047 /* dst = LRP(s0, s1, s2):
7048 * dst = s0 * (s1 - s2) + s2
7050 * SUB tmp, s1, s2; tmp = s1 - s2
7051 * MAD dst, s0, tmp, s2; dst = s0 * t1 + s2
7053 unsigned tmp
= get_temp_index(emit
);
7054 struct tgsi_full_src_register src_tmp
= make_src_temp_reg(tmp
);
7055 struct tgsi_full_dst_register dst_tmp
= make_dst_temp_reg(tmp
);
7056 struct tgsi_full_src_register neg_src2
= negate_src(&inst
->Src
[2]);
7058 /* ADD tmp, s1, -s2 */
7059 emit_instruction_opn(emit
, VGPU10_OPCODE_ADD
, &dst_tmp
,
7060 &inst
->Src
[1], &neg_src2
, NULL
, FALSE
,
7061 inst
->Instruction
.Precise
);
7063 /* MAD dst, s1, tmp, s3 */
7064 emit_instruction_opn(emit
, VGPU10_OPCODE_MAD
, &inst
->Dst
[0],
7065 &inst
->Src
[0], &src_tmp
, &inst
->Src
[2],
7066 inst
->Instruction
.Saturate
,
7067 inst
->Instruction
.Precise
);
7069 free_temp_indexes(emit
);
7076 * Emit code for TGSI_OPCODE_POW instruction.
7079 emit_pow(struct svga_shader_emitter_v10
*emit
,
7080 const struct tgsi_full_instruction
*inst
)
7082 /* Note that TGSI_OPCODE_POW computes only one value from src0.x and
7083 * src1.x while VGPU10 computes four values.
7085 * dst = POW(src0, src1):
7086 * dst.xyzw = src0.x ^ src1.x
7088 unsigned tmp
= get_temp_index(emit
);
7089 struct tgsi_full_src_register tmp_src
= make_src_temp_reg(tmp
);
7090 struct tgsi_full_dst_register tmp_dst
= make_dst_temp_reg(tmp
);
7091 struct tgsi_full_src_register src0_xxxx
=
7092 swizzle_src(&inst
->Src
[0], TGSI_SWIZZLE_X
, TGSI_SWIZZLE_X
,
7093 TGSI_SWIZZLE_X
, TGSI_SWIZZLE_X
);
7094 struct tgsi_full_src_register src1_xxxx
=
7095 swizzle_src(&inst
->Src
[1], TGSI_SWIZZLE_X
, TGSI_SWIZZLE_X
,
7096 TGSI_SWIZZLE_X
, TGSI_SWIZZLE_X
);
7098 /* LOG tmp, s0.xxxx */
7099 emit_instruction_opn(emit
, VGPU10_OPCODE_LOG
,
7100 &tmp_dst
, &src0_xxxx
, NULL
, NULL
,
7101 FALSE
, inst
->Instruction
.Precise
);
7103 /* MUL tmp, tmp, s1.xxxx */
7104 emit_instruction_opn(emit
, VGPU10_OPCODE_MUL
,
7105 &tmp_dst
, &tmp_src
, &src1_xxxx
, NULL
,
7106 FALSE
, inst
->Instruction
.Precise
);
7108 /* EXP tmp, s0.xxxx */
7109 emit_instruction_opn(emit
, VGPU10_OPCODE_EXP
,
7110 &inst
->Dst
[0], &tmp_src
, NULL
, NULL
,
7111 inst
->Instruction
.Saturate
,
7112 inst
->Instruction
.Precise
);
7115 free_temp_indexes(emit
);
7122 * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction.
7125 emit_rcp(struct svga_shader_emitter_v10
*emit
,
7126 const struct tgsi_full_instruction
*inst
)
7128 if (emit
->version
>= 50) {
7129 /* use new RCP instruction. But VGPU10_OPCODE_RCP is component-wise
7130 * while TGSI_OPCODE_RCP computes dst.xyzw = 1.0 / src.xxxx so we need
7131 * to manipulate the src register's swizzle.
7133 struct tgsi_full_src_register src
= inst
->Src
[0];
7134 src
.Register
.SwizzleY
=
7135 src
.Register
.SwizzleZ
=
7136 src
.Register
.SwizzleW
= src
.Register
.SwizzleX
;
7138 begin_emit_instruction(emit
);
7139 emit_opcode_precise(emit
, VGPU10_OPCODE_RCP
,
7140 inst
->Instruction
.Saturate
,
7141 inst
->Instruction
.Precise
);
7142 emit_dst_register(emit
, &inst
->Dst
[0]);
7143 emit_src_register(emit
, &src
);
7144 end_emit_instruction(emit
);
7147 struct tgsi_full_src_register one
= make_immediate_reg_float(emit
, 1.0f
);
7149 unsigned tmp
= get_temp_index(emit
);
7150 struct tgsi_full_src_register tmp_src
= make_src_temp_reg(tmp
);
7151 struct tgsi_full_dst_register tmp_dst
= make_dst_temp_reg(tmp
);
7153 struct tgsi_full_dst_register tmp_dst_x
=
7154 writemask_dst(&tmp_dst
, TGSI_WRITEMASK_X
);
7155 struct tgsi_full_src_register tmp_src_xxxx
=
7156 scalar_src(&tmp_src
, TGSI_SWIZZLE_X
);
7158 /* DIV tmp.x, 1.0, s0 */
7159 emit_instruction_opn(emit
, VGPU10_OPCODE_DIV
,
7160 &tmp_dst_x
, &one
, &inst
->Src
[0], NULL
,
7161 FALSE
, inst
->Instruction
.Precise
);
7163 /* MOV dst, tmp.xxxx */
7164 emit_instruction_opn(emit
, VGPU10_OPCODE_MOV
,
7165 &inst
->Dst
[0], &tmp_src_xxxx
, NULL
, NULL
,
7166 inst
->Instruction
.Saturate
,
7167 inst
->Instruction
.Precise
);
7169 free_temp_indexes(emit
);
7177 * Emit code for TGSI_OPCODE_RSQ instruction.
7180 emit_rsq(struct svga_shader_emitter_v10
*emit
,
7181 const struct tgsi_full_instruction
*inst
)
7184 * dst.xyzw = 1 / sqrt(src.x)
7190 unsigned tmp
= get_temp_index(emit
);
7191 struct tgsi_full_src_register tmp_src
= make_src_temp_reg(tmp
);
7192 struct tgsi_full_dst_register tmp_dst
= make_dst_temp_reg(tmp
);
7194 struct tgsi_full_dst_register tmp_dst_x
=
7195 writemask_dst(&tmp_dst
, TGSI_WRITEMASK_X
);
7196 struct tgsi_full_src_register tmp_src_xxxx
=
7197 scalar_src(&tmp_src
, TGSI_SWIZZLE_X
);
7199 /* RSQ tmp, src.x */
7200 emit_instruction_opn(emit
, VGPU10_OPCODE_RSQ
,
7201 &tmp_dst_x
, &inst
->Src
[0], NULL
, NULL
,
7202 FALSE
, inst
->Instruction
.Precise
);
7204 /* MOV dst, tmp.xxxx */
7205 emit_instruction_opn(emit
, VGPU10_OPCODE_MOV
,
7206 &inst
->Dst
[0], &tmp_src_xxxx
, NULL
, NULL
,
7207 inst
->Instruction
.Saturate
,
7208 inst
->Instruction
.Precise
);
7211 free_temp_indexes(emit
);
7218 * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
7221 emit_seq(struct svga_shader_emitter_v10
*emit
,
7222 const struct tgsi_full_instruction
*inst
)
7224 /* dst = SEQ(s0, s1):
7225 * dst = s0 == s1 ? 1.0 : 0.0 (per component)
7227 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp)
7228 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
7230 unsigned tmp
= get_temp_index(emit
);
7231 struct tgsi_full_src_register tmp_src
= make_src_temp_reg(tmp
);
7232 struct tgsi_full_dst_register tmp_dst
= make_dst_temp_reg(tmp
);
7233 struct tgsi_full_src_register zero
= make_immediate_reg_float(emit
, 0.0f
);
7234 struct tgsi_full_src_register one
= make_immediate_reg_float(emit
, 1.0f
);
7236 /* EQ tmp, s0, s1 */
7237 emit_instruction_op2(emit
, VGPU10_OPCODE_EQ
, &tmp_dst
, &inst
->Src
[0],
7240 /* MOVC dst, tmp, one, zero */
7241 emit_instruction_op3(emit
, VGPU10_OPCODE_MOVC
, &inst
->Dst
[0], &tmp_src
,
7244 free_temp_indexes(emit
);
7251 * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction.
7254 emit_sge(struct svga_shader_emitter_v10
*emit
,
7255 const struct tgsi_full_instruction
*inst
)
7257 /* dst = SGE(s0, s1):
7258 * dst = s0 >= s1 ? 1.0 : 0.0 (per component)
7260 * GE tmp, s0, s1; tmp = s0 >= s1 : 0xffffffff : 0 (per comp)
7261 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
7263 unsigned tmp
= get_temp_index(emit
);
7264 struct tgsi_full_src_register tmp_src
= make_src_temp_reg(tmp
);
7265 struct tgsi_full_dst_register tmp_dst
= make_dst_temp_reg(tmp
);
7266 struct tgsi_full_src_register zero
= make_immediate_reg_float(emit
, 0.0f
);
7267 struct tgsi_full_src_register one
= make_immediate_reg_float(emit
, 1.0f
);
7269 /* GE tmp, s0, s1 */
7270 emit_instruction_op2(emit
, VGPU10_OPCODE_GE
, &tmp_dst
, &inst
->Src
[0],
7273 /* MOVC dst, tmp, one, zero */
7274 emit_instruction_op3(emit
, VGPU10_OPCODE_MOVC
, &inst
->Dst
[0], &tmp_src
,
7277 free_temp_indexes(emit
);
7284 * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction.
7287 emit_sgt(struct svga_shader_emitter_v10
*emit
,
7288 const struct tgsi_full_instruction
*inst
)
7290 /* dst = SGT(s0, s1):
7291 * dst = s0 > s1 ? 1.0 : 0.0 (per component)
7293 * LT tmp, s1, s0; tmp = s1 < s0 ? 0xffffffff : 0 (per comp)
7294 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
7296 unsigned tmp
= get_temp_index(emit
);
7297 struct tgsi_full_src_register tmp_src
= make_src_temp_reg(tmp
);
7298 struct tgsi_full_dst_register tmp_dst
= make_dst_temp_reg(tmp
);
7299 struct tgsi_full_src_register zero
= make_immediate_reg_float(emit
, 0.0f
);
7300 struct tgsi_full_src_register one
= make_immediate_reg_float(emit
, 1.0f
);
7302 /* LT tmp, s1, s0 */
7303 emit_instruction_op2(emit
, VGPU10_OPCODE_LT
, &tmp_dst
, &inst
->Src
[1],
7306 /* MOVC dst, tmp, one, zero */
7307 emit_instruction_op3(emit
, VGPU10_OPCODE_MOVC
, &inst
->Dst
[0], &tmp_src
,
7310 free_temp_indexes(emit
);
7317 * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions.
7320 emit_sincos(struct svga_shader_emitter_v10
*emit
,
7321 const struct tgsi_full_instruction
*inst
)
7323 unsigned tmp
= get_temp_index(emit
);
7324 struct tgsi_full_src_register tmp_src
= make_src_temp_reg(tmp
);
7325 struct tgsi_full_dst_register tmp_dst
= make_dst_temp_reg(tmp
);
7327 struct tgsi_full_src_register tmp_src_xxxx
=
7328 scalar_src(&tmp_src
, TGSI_SWIZZLE_X
);
7329 struct tgsi_full_dst_register tmp_dst_x
=
7330 writemask_dst(&tmp_dst
, TGSI_WRITEMASK_X
);
7332 begin_emit_instruction(emit
);
7333 emit_opcode(emit
, VGPU10_OPCODE_SINCOS
, FALSE
);
7335 if(inst
->Instruction
.Opcode
== TGSI_OPCODE_SIN
)
7337 emit_dst_register(emit
, &tmp_dst_x
); /* first destination register */
7338 emit_null_dst_register(emit
); /* second destination register */
7341 emit_null_dst_register(emit
);
7342 emit_dst_register(emit
, &tmp_dst_x
);
7345 emit_src_register(emit
, &inst
->Src
[0]);
7346 end_emit_instruction(emit
);
7348 emit_instruction_opn(emit
, VGPU10_OPCODE_MOV
,
7349 &inst
->Dst
[0], &tmp_src_xxxx
, NULL
, NULL
,
7350 inst
->Instruction
.Saturate
,
7351 inst
->Instruction
.Precise
);
7353 free_temp_indexes(emit
);
7360 * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction.
7363 emit_sle(struct svga_shader_emitter_v10
*emit
,
7364 const struct tgsi_full_instruction
*inst
)
7366 /* dst = SLE(s0, s1):
7367 * dst = s0 <= s1 ? 1.0 : 0.0 (per component)
7369 * GE tmp, s1, s0; tmp = s1 >= s0 : 0xffffffff : 0 (per comp)
7370 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
7372 unsigned tmp
= get_temp_index(emit
);
7373 struct tgsi_full_src_register tmp_src
= make_src_temp_reg(tmp
);
7374 struct tgsi_full_dst_register tmp_dst
= make_dst_temp_reg(tmp
);
7375 struct tgsi_full_src_register zero
= make_immediate_reg_float(emit
, 0.0f
);
7376 struct tgsi_full_src_register one
= make_immediate_reg_float(emit
, 1.0f
);
7378 /* GE tmp, s1, s0 */
7379 emit_instruction_op2(emit
, VGPU10_OPCODE_GE
, &tmp_dst
, &inst
->Src
[1],
7382 /* MOVC dst, tmp, one, zero */
7383 emit_instruction_op3(emit
, VGPU10_OPCODE_MOVC
, &inst
->Dst
[0], &tmp_src
,
7386 free_temp_indexes(emit
);
7393 * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction.
7396 emit_slt(struct svga_shader_emitter_v10
*emit
,
7397 const struct tgsi_full_instruction
*inst
)
7399 /* dst = SLT(s0, s1):
7400 * dst = s0 < s1 ? 1.0 : 0.0 (per component)
7402 * LT tmp, s0, s1; tmp = s0 < s1 ? 0xffffffff : 0 (per comp)
7403 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
7405 unsigned tmp
= get_temp_index(emit
);
7406 struct tgsi_full_src_register tmp_src
= make_src_temp_reg(tmp
);
7407 struct tgsi_full_dst_register tmp_dst
= make_dst_temp_reg(tmp
);
7408 struct tgsi_full_src_register zero
= make_immediate_reg_float(emit
, 0.0f
);
7409 struct tgsi_full_src_register one
= make_immediate_reg_float(emit
, 1.0f
);
7411 /* LT tmp, s0, s1 */
7412 emit_instruction_op2(emit
, VGPU10_OPCODE_LT
, &tmp_dst
, &inst
->Src
[0],
7415 /* MOVC dst, tmp, one, zero */
7416 emit_instruction_op3(emit
, VGPU10_OPCODE_MOVC
, &inst
->Dst
[0], &tmp_src
,
7419 free_temp_indexes(emit
);
7426 * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction.
7429 emit_sne(struct svga_shader_emitter_v10
*emit
,
7430 const struct tgsi_full_instruction
*inst
)
7432 /* dst = SNE(s0, s1):
7433 * dst = s0 != s1 ? 1.0 : 0.0 (per component)
7435 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp)
7436 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
7438 unsigned tmp
= get_temp_index(emit
);
7439 struct tgsi_full_src_register tmp_src
= make_src_temp_reg(tmp
);
7440 struct tgsi_full_dst_register tmp_dst
= make_dst_temp_reg(tmp
);
7441 struct tgsi_full_src_register zero
= make_immediate_reg_float(emit
, 0.0f
);
7442 struct tgsi_full_src_register one
= make_immediate_reg_float(emit
, 1.0f
);
7444 /* NE tmp, s0, s1 */
7445 emit_instruction_op2(emit
, VGPU10_OPCODE_NE
, &tmp_dst
, &inst
->Src
[0],
7448 /* MOVC dst, tmp, one, zero */
7449 emit_instruction_op3(emit
, VGPU10_OPCODE_MOVC
, &inst
->Dst
[0], &tmp_src
,
7452 free_temp_indexes(emit
);
7459 * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction.
7462 emit_ssg(struct svga_shader_emitter_v10
*emit
,
7463 const struct tgsi_full_instruction
*inst
)
7465 /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
7466 * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
7467 * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
7468 * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
7470 * LT tmp1, src, zero; tmp1 = src < zero ? 0xffffffff : 0 (per comp)
7471 * MOVC tmp2, tmp1, -1.0, 0.0; tmp2 = tmp1 ? -1.0 : 0.0 (per component)
7472 * LT tmp1, zero, src; tmp1 = zero < src ? 0xffffffff : 0 (per comp)
7473 * MOVC dst, tmp1, 1.0, tmp2; dst = tmp1 ? 1.0 : tmp2 (per component)
7475 struct tgsi_full_src_register zero
=
7476 make_immediate_reg_float(emit
, 0.0f
);
7477 struct tgsi_full_src_register one
=
7478 make_immediate_reg_float(emit
, 1.0f
);
7479 struct tgsi_full_src_register neg_one
=
7480 make_immediate_reg_float(emit
, -1.0f
);
7482 unsigned tmp1
= get_temp_index(emit
);
7483 struct tgsi_full_src_register tmp1_src
= make_src_temp_reg(tmp1
);
7484 struct tgsi_full_dst_register tmp1_dst
= make_dst_temp_reg(tmp1
);
7486 unsigned tmp2
= get_temp_index(emit
);
7487 struct tgsi_full_src_register tmp2_src
= make_src_temp_reg(tmp2
);
7488 struct tgsi_full_dst_register tmp2_dst
= make_dst_temp_reg(tmp2
);
7490 emit_instruction_op2(emit
, VGPU10_OPCODE_LT
, &tmp1_dst
, &inst
->Src
[0],
7492 emit_instruction_op3(emit
, VGPU10_OPCODE_MOVC
, &tmp2_dst
, &tmp1_src
,
7494 emit_instruction_op2(emit
, VGPU10_OPCODE_LT
, &tmp1_dst
, &zero
,
7496 emit_instruction_op3(emit
, VGPU10_OPCODE_MOVC
, &inst
->Dst
[0], &tmp1_src
,
7499 free_temp_indexes(emit
);
7506 * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction.
7509 emit_issg(struct svga_shader_emitter_v10
*emit
,
7510 const struct tgsi_full_instruction
*inst
)
7512 /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
7513 * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
7514 * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
7515 * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
7517 * ILT tmp1, src, 0 tmp1 = src < 0 ? -1 : 0 (per component)
7518 * ILT tmp2, 0, src tmp2 = 0 < src ? -1 : 0 (per component)
7519 * IADD dst, tmp1, neg(tmp2) dst = tmp1 - tmp2 (per component)
7521 struct tgsi_full_src_register zero
= make_immediate_reg_float(emit
, 0.0f
);
7523 unsigned tmp1
= get_temp_index(emit
);
7524 struct tgsi_full_src_register tmp1_src
= make_src_temp_reg(tmp1
);
7525 struct tgsi_full_dst_register tmp1_dst
= make_dst_temp_reg(tmp1
);
7527 unsigned tmp2
= get_temp_index(emit
);
7528 struct tgsi_full_src_register tmp2_src
= make_src_temp_reg(tmp2
);
7529 struct tgsi_full_dst_register tmp2_dst
= make_dst_temp_reg(tmp2
);
7531 struct tgsi_full_src_register neg_tmp2
= negate_src(&tmp2_src
);
7533 emit_instruction_op2(emit
, VGPU10_OPCODE_ILT
, &tmp1_dst
,
7534 &inst
->Src
[0], &zero
);
7535 emit_instruction_op2(emit
, VGPU10_OPCODE_ILT
, &tmp2_dst
,
7536 &zero
, &inst
->Src
[0]);
7537 emit_instruction_op2(emit
, VGPU10_OPCODE_IADD
, &inst
->Dst
[0],
7538 &tmp1_src
, &neg_tmp2
);
7540 free_temp_indexes(emit
);
7547 * Emit a comparison instruction. The dest register will get
7548 * 0 or ~0 values depending on the outcome of comparing src0 to src1.
7551 emit_comparison(struct svga_shader_emitter_v10
*emit
,
7553 const struct tgsi_full_dst_register
*dst
,
7554 const struct tgsi_full_src_register
*src0
,
7555 const struct tgsi_full_src_register
*src1
)
7557 struct tgsi_full_src_register immediate
;
7558 VGPU10OpcodeToken0 opcode0
;
7559 boolean swapSrc
= FALSE
;
7561 /* Sanity checks for svga vs. gallium enums */
7562 STATIC_ASSERT(SVGA3D_CMP_LESS
== (PIPE_FUNC_LESS
+ 1));
7563 STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL
== (PIPE_FUNC_GEQUAL
+ 1));
7568 case SVGA3D_CMP_NEVER
:
7569 immediate
= make_immediate_reg_int(emit
, 0);
7571 begin_emit_instruction(emit
);
7572 emit_dword(emit
, VGPU10_OPCODE_MOV
);
7573 emit_dst_register(emit
, dst
);
7574 emit_src_register(emit
, &immediate
);
7575 end_emit_instruction(emit
);
7577 case SVGA3D_CMP_ALWAYS
:
7578 immediate
= make_immediate_reg_int(emit
, -1);
7580 begin_emit_instruction(emit
);
7581 emit_dword(emit
, VGPU10_OPCODE_MOV
);
7582 emit_dst_register(emit
, dst
);
7583 emit_src_register(emit
, &immediate
);
7584 end_emit_instruction(emit
);
7586 case SVGA3D_CMP_LESS
:
7587 opcode0
.opcodeType
= VGPU10_OPCODE_LT
;
7589 case SVGA3D_CMP_EQUAL
:
7590 opcode0
.opcodeType
= VGPU10_OPCODE_EQ
;
7592 case SVGA3D_CMP_LESSEQUAL
:
7593 opcode0
.opcodeType
= VGPU10_OPCODE_GE
;
7596 case SVGA3D_CMP_GREATER
:
7597 opcode0
.opcodeType
= VGPU10_OPCODE_LT
;
7600 case SVGA3D_CMP_NOTEQUAL
:
7601 opcode0
.opcodeType
= VGPU10_OPCODE_NE
;
7603 case SVGA3D_CMP_GREATEREQUAL
:
7604 opcode0
.opcodeType
= VGPU10_OPCODE_GE
;
7607 assert(!"Unexpected comparison mode");
7608 opcode0
.opcodeType
= VGPU10_OPCODE_EQ
;
7611 begin_emit_instruction(emit
);
7612 emit_dword(emit
, opcode0
.value
);
7613 emit_dst_register(emit
, dst
);
7615 emit_src_register(emit
, src1
);
7616 emit_src_register(emit
, src0
);
7619 emit_src_register(emit
, src0
);
7620 emit_src_register(emit
, src1
);
7622 end_emit_instruction(emit
);
7627 * Get texel/address offsets for a texture instruction.
7630 get_texel_offsets(const struct svga_shader_emitter_v10
*emit
,
7631 const struct tgsi_full_instruction
*inst
, int offsets
[3])
7633 if (inst
->Texture
.NumOffsets
== 1) {
7634 /* According to OpenGL Shader Language spec the offsets are only
7635 * fetched from a previously-declared immediate/literal.
7637 const struct tgsi_texture_offset
*off
= inst
->TexOffsets
;
7638 const unsigned index
= off
[0].Index
;
7639 const unsigned swizzleX
= off
[0].SwizzleX
;
7640 const unsigned swizzleY
= off
[0].SwizzleY
;
7641 const unsigned swizzleZ
= off
[0].SwizzleZ
;
7642 const union tgsi_immediate_data
*imm
= emit
->immediates
[index
];
7644 assert(inst
->TexOffsets
[0].File
== TGSI_FILE_IMMEDIATE
);
7646 offsets
[0] = imm
[swizzleX
].Int
;
7647 offsets
[1] = imm
[swizzleY
].Int
;
7648 offsets
[2] = imm
[swizzleZ
].Int
;
7651 offsets
[0] = offsets
[1] = offsets
[2] = 0;
7657 * Set up the coordinate register for texture sampling.
7658 * When we're sampling from a RECT texture we have to scale the
7659 * unnormalized coordinate to a normalized coordinate.
7660 * We do that by multiplying the coordinate by an "extra" constant.
7661 * An alternative would be to use the RESINFO instruction to query the
7664 static struct tgsi_full_src_register
7665 setup_texcoord(struct svga_shader_emitter_v10
*emit
,
7667 const struct tgsi_full_src_register
*coord
)
7669 if (emit
->sampler_view
[unit
] && emit
->key
.tex
[unit
].unnormalized
) {
7670 unsigned scale_index
= emit
->texcoord_scale_index
[unit
];
7671 unsigned tmp
= get_temp_index(emit
);
7672 struct tgsi_full_src_register tmp_src
= make_src_temp_reg(tmp
);
7673 struct tgsi_full_dst_register tmp_dst
= make_dst_temp_reg(tmp
);
7674 struct tgsi_full_src_register scale_src
= make_src_const_reg(scale_index
);
7676 if (emit
->key
.tex
[unit
].texel_bias
) {
7677 /* to fix texture coordinate rounding issue, 0.0001 offset is
7678 * been added. This fixes piglit test fbo-blit-scaled-linear. */
7679 struct tgsi_full_src_register offset
=
7680 make_immediate_reg_float(emit
, 0.0001f
);
7682 /* ADD tmp, coord, offset */
7683 emit_instruction_op2(emit
, VGPU10_OPCODE_ADD
, &tmp_dst
,
7685 /* MUL tmp, tmp, scale */
7686 emit_instruction_op2(emit
, VGPU10_OPCODE_MUL
, &tmp_dst
,
7687 &tmp_src
, &scale_src
);
7690 /* MUL tmp, coord, const[] */
7691 emit_instruction_op2(emit
, VGPU10_OPCODE_MUL
, &tmp_dst
,
7697 /* use texcoord as-is */
7704 * For SAMPLE_C instructions, emit the extra src register which indicates
7705 * the reference/comparision value.
7708 emit_tex_compare_refcoord(struct svga_shader_emitter_v10
*emit
,
7709 enum tgsi_texture_type target
,
7710 const struct tgsi_full_src_register
*coord
)
7712 struct tgsi_full_src_register coord_src_ref
;
7715 assert(tgsi_is_shadow_target(target
));
7717 component
= tgsi_util_get_shadow_ref_src_index(target
) % 4;
7718 assert(component
>= 0);
7720 coord_src_ref
= scalar_src(coord
, component
);
7722 emit_src_register(emit
, &coord_src_ref
);
7727 * Info for implementing texture swizzles.
7728 * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle()
7729 * functions use this to encapsulate the extra steps needed to perform
7730 * a texture swizzle, or shadow/depth comparisons.
7731 * The shadow/depth comparison is only done here if for the cases where
7732 * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare).
7734 struct tex_swizzle_info
7737 boolean shadow_compare
;
7739 enum tgsi_texture_type texture_target
; /**< TGSI_TEXTURE_x */
7740 struct tgsi_full_src_register tmp_src
;
7741 struct tgsi_full_dst_register tmp_dst
;
7742 const struct tgsi_full_dst_register
*inst_dst
;
7743 const struct tgsi_full_src_register
*coord_src
;
7748 * Do setup for handling texture swizzles or shadow compares.
7749 * \param unit the texture unit
7750 * \param inst the TGSI texture instruction
7751 * \param shadow_compare do shadow/depth comparison?
7752 * \param swz returns the swizzle info
7755 begin_tex_swizzle(struct svga_shader_emitter_v10
*emit
,
7757 const struct tgsi_full_instruction
*inst
,
7758 boolean shadow_compare
,
7759 struct tex_swizzle_info
*swz
)
7761 swz
->swizzled
= (emit
->key
.tex
[unit
].swizzle_r
!= TGSI_SWIZZLE_X
||
7762 emit
->key
.tex
[unit
].swizzle_g
!= TGSI_SWIZZLE_Y
||
7763 emit
->key
.tex
[unit
].swizzle_b
!= TGSI_SWIZZLE_Z
||
7764 emit
->key
.tex
[unit
].swizzle_a
!= TGSI_SWIZZLE_W
);
7766 swz
->shadow_compare
= shadow_compare
;
7767 swz
->texture_target
= inst
->Texture
.Texture
;
7769 if (swz
->swizzled
|| shadow_compare
) {
7770 /* Allocate temp register for the result of the SAMPLE instruction
7771 * and the source of the MOV/compare/swizzle instructions.
7773 unsigned tmp
= get_temp_index(emit
);
7774 swz
->tmp_src
= make_src_temp_reg(tmp
);
7775 swz
->tmp_dst
= make_dst_temp_reg(tmp
);
7779 swz
->inst_dst
= &inst
->Dst
[0];
7780 swz
->coord_src
= &inst
->Src
[0];
7782 emit
->fs
.shadow_compare_units
|= shadow_compare
<< unit
;
7787 * Returns the register to put the SAMPLE instruction results into.
7788 * This will either be the original instruction dst reg (if no swizzle
7789 * and no shadow comparison) or a temporary reg if there is a swizzle.
7791 static const struct tgsi_full_dst_register
*
7792 get_tex_swizzle_dst(const struct tex_swizzle_info
*swz
)
7794 return (swz
->swizzled
|| swz
->shadow_compare
)
7795 ? &swz
->tmp_dst
: swz
->inst_dst
;
7800 * This emits the MOV instruction that actually implements a texture swizzle
7801 * and/or shadow comparison.
7804 end_tex_swizzle(struct svga_shader_emitter_v10
*emit
,
7805 const struct tex_swizzle_info
*swz
)
7807 if (swz
->shadow_compare
) {
7808 /* Emit extra instructions to compare the fetched texel value against
7809 * a texture coordinate component. The result of the comparison
7812 struct tgsi_full_src_register coord_src
;
7813 struct tgsi_full_src_register texel_src
=
7814 scalar_src(&swz
->tmp_src
, TGSI_SWIZZLE_X
);
7815 struct tgsi_full_src_register one
=
7816 make_immediate_reg_float(emit
, 1.0f
);
7817 /* convert gallium comparison func to SVGA comparison func */
7818 SVGA3dCmpFunc compare_func
= emit
->key
.tex
[swz
->unit
].compare_func
+ 1;
7821 tgsi_util_get_shadow_ref_src_index(swz
->texture_target
) % 4;
7822 assert(component
>= 0);
7823 coord_src
= scalar_src(swz
->coord_src
, component
);
7825 /* COMPARE tmp, coord, texel */
7826 emit_comparison(emit
, compare_func
,
7827 &swz
->tmp_dst
, &coord_src
, &texel_src
);
7829 /* AND dest, tmp, {1.0} */
7830 begin_emit_instruction(emit
);
7831 emit_opcode(emit
, VGPU10_OPCODE_AND
, FALSE
);
7832 if (swz
->swizzled
) {
7833 emit_dst_register(emit
, &swz
->tmp_dst
);
7836 emit_dst_register(emit
, swz
->inst_dst
);
7838 emit_src_register(emit
, &swz
->tmp_src
);
7839 emit_src_register(emit
, &one
);
7840 end_emit_instruction(emit
);
7843 if (swz
->swizzled
) {
7844 unsigned swz_r
= emit
->key
.tex
[swz
->unit
].swizzle_r
;
7845 unsigned swz_g
= emit
->key
.tex
[swz
->unit
].swizzle_g
;
7846 unsigned swz_b
= emit
->key
.tex
[swz
->unit
].swizzle_b
;
7847 unsigned swz_a
= emit
->key
.tex
[swz
->unit
].swizzle_a
;
7848 unsigned writemask_0
= 0, writemask_1
= 0;
7849 boolean int_tex
= is_integer_type(emit
->sampler_return_type
[swz
->unit
]);
7851 /* Swizzle w/out zero/one terms */
7852 struct tgsi_full_src_register src_swizzled
=
7853 swizzle_src(&swz
->tmp_src
,
7854 swz_r
< PIPE_SWIZZLE_0
? swz_r
: PIPE_SWIZZLE_X
,
7855 swz_g
< PIPE_SWIZZLE_0
? swz_g
: PIPE_SWIZZLE_Y
,
7856 swz_b
< PIPE_SWIZZLE_0
? swz_b
: PIPE_SWIZZLE_Z
,
7857 swz_a
< PIPE_SWIZZLE_0
? swz_a
: PIPE_SWIZZLE_W
);
7859 /* MOV dst, color(tmp).<swizzle> */
7860 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
,
7861 swz
->inst_dst
, &src_swizzled
);
7863 /* handle swizzle zero terms */
7864 writemask_0
= (((swz_r
== PIPE_SWIZZLE_0
) << 0) |
7865 ((swz_g
== PIPE_SWIZZLE_0
) << 1) |
7866 ((swz_b
== PIPE_SWIZZLE_0
) << 2) |
7867 ((swz_a
== PIPE_SWIZZLE_0
) << 3));
7868 writemask_0
&= swz
->inst_dst
->Register
.WriteMask
;
7871 struct tgsi_full_src_register zero
= int_tex
?
7872 make_immediate_reg_int(emit
, 0) :
7873 make_immediate_reg_float(emit
, 0.0f
);
7874 struct tgsi_full_dst_register dst
=
7875 writemask_dst(swz
->inst_dst
, writemask_0
);
7877 /* MOV dst.writemask_0, {0,0,0,0} */
7878 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &dst
, &zero
);
7881 /* handle swizzle one terms */
7882 writemask_1
= (((swz_r
== PIPE_SWIZZLE_1
) << 0) |
7883 ((swz_g
== PIPE_SWIZZLE_1
) << 1) |
7884 ((swz_b
== PIPE_SWIZZLE_1
) << 2) |
7885 ((swz_a
== PIPE_SWIZZLE_1
) << 3));
7886 writemask_1
&= swz
->inst_dst
->Register
.WriteMask
;
7889 struct tgsi_full_src_register one
= int_tex
?
7890 make_immediate_reg_int(emit
, 1) :
7891 make_immediate_reg_float(emit
, 1.0f
);
7892 struct tgsi_full_dst_register dst
=
7893 writemask_dst(swz
->inst_dst
, writemask_1
);
7895 /* MOV dst.writemask_1, {1,1,1,1} */
7896 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &dst
, &one
);
7903 * Emit code for TGSI_OPCODE_SAMPLE instruction.
7906 emit_sample(struct svga_shader_emitter_v10
*emit
,
7907 const struct tgsi_full_instruction
*inst
)
7909 const unsigned resource_unit
= inst
->Src
[1].Register
.Index
;
7910 const unsigned sampler_unit
= inst
->Src
[2].Register
.Index
;
7911 struct tgsi_full_src_register coord
;
7913 struct tex_swizzle_info swz_info
;
7915 begin_tex_swizzle(emit
, sampler_unit
, inst
, FALSE
, &swz_info
);
7917 get_texel_offsets(emit
, inst
, offsets
);
7919 coord
= setup_texcoord(emit
, resource_unit
, &inst
->Src
[0]);
7921 /* SAMPLE dst, coord(s0), resource, sampler */
7922 begin_emit_instruction(emit
);
7924 /* NOTE: for non-fragment shaders, we should use VGPU10_OPCODE_SAMPLE_L
7925 * with LOD=0. But our virtual GPU accepts this as-is.
7927 emit_sample_opcode(emit
, VGPU10_OPCODE_SAMPLE
,
7928 inst
->Instruction
.Saturate
, offsets
);
7929 emit_dst_register(emit
, get_tex_swizzle_dst(&swz_info
));
7930 emit_src_register(emit
, &coord
);
7931 emit_resource_register(emit
, resource_unit
);
7932 emit_sampler_register(emit
, sampler_unit
);
7933 end_emit_instruction(emit
);
7935 end_tex_swizzle(emit
, &swz_info
);
7937 free_temp_indexes(emit
);
7944 * Check if a texture instruction is valid.
7945 * An example of an invalid texture instruction is doing shadow comparison
7946 * with an integer-valued texture.
7947 * If we detect an invalid texture instruction, we replace it with:
7948 * MOV dst, {1,1,1,1};
7949 * \return TRUE if valid, FALSE if invalid.
7952 is_valid_tex_instruction(struct svga_shader_emitter_v10
*emit
,
7953 const struct tgsi_full_instruction
*inst
)
7955 const unsigned unit
= inst
->Src
[1].Register
.Index
;
7956 const enum tgsi_texture_type target
= inst
->Texture
.Texture
;
7957 boolean valid
= TRUE
;
7959 if (tgsi_is_shadow_target(target
) &&
7960 is_integer_type(emit
->sampler_return_type
[unit
])) {
7961 debug_printf("Invalid SAMPLE_C with an integer texture!\n");
7964 /* XXX might check for other conditions in the future here */
7967 /* emit a MOV dst, {1,1,1,1} instruction. */
7968 struct tgsi_full_src_register one
= make_immediate_reg_float(emit
, 1.0f
);
7969 begin_emit_instruction(emit
);
7970 emit_opcode(emit
, VGPU10_OPCODE_MOV
, FALSE
);
7971 emit_dst_register(emit
, &inst
->Dst
[0]);
7972 emit_src_register(emit
, &one
);
7973 end_emit_instruction(emit
);
7981 * Emit code for TGSI_OPCODE_TEX (simple texture lookup)
7984 emit_tex(struct svga_shader_emitter_v10
*emit
,
7985 const struct tgsi_full_instruction
*inst
)
7987 const uint unit
= inst
->Src
[1].Register
.Index
;
7988 const enum tgsi_texture_type target
= inst
->Texture
.Texture
;
7989 VGPU10_OPCODE_TYPE opcode
;
7990 struct tgsi_full_src_register coord
;
7992 struct tex_swizzle_info swz_info
;
7994 /* check that the sampler returns a float */
7995 if (!is_valid_tex_instruction(emit
, inst
))
7998 begin_tex_swizzle(emit
, unit
, inst
, FALSE
, &swz_info
);
8000 get_texel_offsets(emit
, inst
, offsets
);
8002 coord
= setup_texcoord(emit
, unit
, &inst
->Src
[0]);
8004 /* SAMPLE dst, coord(s0), resource, sampler */
8005 begin_emit_instruction(emit
);
8007 if (tgsi_is_shadow_target(target
))
8008 opcode
= VGPU10_OPCODE_SAMPLE_C
;
8010 opcode
= VGPU10_OPCODE_SAMPLE
;
8012 emit_sample_opcode(emit
, opcode
, inst
->Instruction
.Saturate
, offsets
);
8013 emit_dst_register(emit
, get_tex_swizzle_dst(&swz_info
));
8014 emit_src_register(emit
, &coord
);
8015 emit_resource_register(emit
, unit
);
8016 emit_sampler_register(emit
, unit
);
8017 if (opcode
== VGPU10_OPCODE_SAMPLE_C
) {
8018 emit_tex_compare_refcoord(emit
, target
, &coord
);
8020 end_emit_instruction(emit
);
8022 end_tex_swizzle(emit
, &swz_info
);
8024 free_temp_indexes(emit
);
8030 * Emit code for TGSI_OPCODE_TG4 (texture lookup for texture gather)
8033 emit_tg4(struct svga_shader_emitter_v10
*emit
,
8034 const struct tgsi_full_instruction
*inst
)
8036 const uint unit
= inst
->Src
[2].Register
.Index
;
8037 struct tgsi_full_src_register src
;
8038 struct tgsi_full_src_register offset_src
, sampler
, ref
;
8041 /* check that the sampler returns a float */
8042 if (!is_valid_tex_instruction(emit
, inst
))
8045 if (emit
->version
>= 50) {
8046 unsigned target
= inst
->Texture
.Texture
;
8047 int index
= inst
->Src
[1].Register
.Index
;
8048 const union tgsi_immediate_data
*imm
= emit
->immediates
[index
];
8049 int select_comp
= imm
[inst
->Src
[1].Register
.SwizzleX
].Int
;
8050 unsigned select_swizzle
= PIPE_SWIZZLE_X
;
8052 if (!tgsi_is_shadow_target(target
)) {
8053 switch (select_comp
) {
8055 select_swizzle
= emit
->key
.tex
[unit
].swizzle_r
;
8058 select_swizzle
= emit
->key
.tex
[unit
].swizzle_g
;
8061 select_swizzle
= emit
->key
.tex
[unit
].swizzle_b
;
8064 select_swizzle
= emit
->key
.tex
[unit
].swizzle_a
;
8067 assert(!"Unexpected component in texture gather swizzle");
8071 select_swizzle
= emit
->key
.tex
[unit
].swizzle_r
;
8074 if (select_swizzle
== PIPE_SWIZZLE_1
) {
8075 src
= make_immediate_reg_float(emit
, 1.0);
8076 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &inst
->Dst
[0], &src
);
8079 else if (select_swizzle
== PIPE_SWIZZLE_0
) {
8080 src
= make_immediate_reg_float(emit
, 0.0);
8081 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &inst
->Dst
[0], &src
);
8085 src
= setup_texcoord(emit
, unit
, &inst
->Src
[0]);
8087 /* GATHER4 dst, coord, resource, sampler */
8088 /* GATHER4_C dst, coord, resource, sampler ref */
8089 /* GATHER4_PO dst, coord, offset resource, sampler */
8090 /* GATHER4_PO_C dst, coord, offset resource, sampler, ref */
8091 begin_emit_instruction(emit
);
8092 if (inst
->Texture
.NumOffsets
== 1) {
8093 if (tgsi_is_shadow_target(target
)) {
8094 emit_opcode(emit
, VGPU10_OPCODE_GATHER4_PO_C
,
8095 inst
->Instruction
.Saturate
);
8098 emit_opcode(emit
, VGPU10_OPCODE_GATHER4_PO
,
8099 inst
->Instruction
.Saturate
);
8103 if (tgsi_is_shadow_target(target
)) {
8104 emit_opcode(emit
, VGPU10_OPCODE_GATHER4_C
,
8105 inst
->Instruction
.Saturate
);
8108 emit_opcode(emit
, VGPU10_OPCODE_GATHER4
,
8109 inst
->Instruction
.Saturate
);
8113 emit_dst_register(emit
, &inst
->Dst
[0]);
8114 emit_src_register(emit
, &src
);
8115 if (inst
->Texture
.NumOffsets
== 1) {
8117 offset_src
= make_src_reg(inst
->TexOffsets
[0].File
,
8118 inst
->TexOffsets
[0].Index
);
8119 offset_src
= swizzle_src(&offset_src
, inst
->TexOffsets
[0].SwizzleX
,
8120 inst
->TexOffsets
[0].SwizzleY
,
8121 inst
->TexOffsets
[0].SwizzleZ
,
8123 emit_src_register(emit
, &offset_src
);
8127 emit_resource_register(emit
, unit
);
8130 sampler
= make_src_reg(TGSI_FILE_SAMPLER
, unit
);
8131 sampler
.Register
.SwizzleX
=
8132 sampler
.Register
.SwizzleY
=
8133 sampler
.Register
.SwizzleZ
=
8134 sampler
.Register
.SwizzleW
= select_swizzle
;
8135 emit_src_register(emit
, &sampler
);
8137 if (tgsi_is_shadow_target(target
)) {
8139 if (target
== TGSI_TEXTURE_SHADOWCUBE_ARRAY
) {
8140 ref
= scalar_src(&inst
->Src
[1], TGSI_SWIZZLE_X
);
8141 emit_tex_compare_refcoord(emit
, target
, &ref
);
8144 emit_tex_compare_refcoord(emit
, target
, &src
);
8148 end_emit_instruction(emit
);
8149 free_temp_indexes(emit
);
8152 /* Only a single channel is supported in SM4_1 and we report
8153 * PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 1.
8154 * Only the 0th component will be gathered.
8156 switch (emit
->key
.tex
[unit
].swizzle_r
) {
8157 case PIPE_SWIZZLE_X
:
8158 get_texel_offsets(emit
, inst
, offsets
);
8159 src
= setup_texcoord(emit
, unit
, &inst
->Src
[0]);
8161 /* Gather dst, coord, resource, sampler */
8162 begin_emit_instruction(emit
);
8163 emit_sample_opcode(emit
, VGPU10_OPCODE_GATHER4
,
8164 inst
->Instruction
.Saturate
, offsets
);
8165 emit_dst_register(emit
, &inst
->Dst
[0]);
8166 emit_src_register(emit
, &src
);
8167 emit_resource_register(emit
, unit
);
8170 sampler
= make_src_reg(TGSI_FILE_SAMPLER
, unit
);
8171 sampler
.Register
.SwizzleX
=
8172 sampler
.Register
.SwizzleY
=
8173 sampler
.Register
.SwizzleZ
=
8174 sampler
.Register
.SwizzleW
= PIPE_SWIZZLE_X
;
8175 emit_src_register(emit
, &sampler
);
8177 end_emit_instruction(emit
);
8179 case PIPE_SWIZZLE_W
:
8180 case PIPE_SWIZZLE_1
:
8181 src
= make_immediate_reg_float(emit
, 1.0);
8182 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &inst
->Dst
[0], &src
);
8184 case PIPE_SWIZZLE_Y
:
8185 case PIPE_SWIZZLE_Z
:
8186 case PIPE_SWIZZLE_0
:
8188 src
= make_immediate_reg_float(emit
, 0.0);
8189 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &inst
->Dst
[0], &src
);
8200 * Emit code for TGSI_OPCODE_TEX2 (texture lookup for shadow cube map arrays)
8203 emit_tex2(struct svga_shader_emitter_v10
*emit
,
8204 const struct tgsi_full_instruction
*inst
)
8206 const uint unit
= inst
->Src
[2].Register
.Index
;
8207 unsigned target
= inst
->Texture
.Texture
;
8208 struct tgsi_full_src_register coord
, ref
;
8210 struct tex_swizzle_info swz_info
;
8212 /* check that the sampler returns a float */
8213 if (!is_valid_tex_instruction(emit
, inst
))
8216 begin_tex_swizzle(emit
, unit
, inst
, FALSE
, &swz_info
);
8218 get_texel_offsets(emit
, inst
, offsets
);
8220 coord
= setup_texcoord(emit
, unit
, &inst
->Src
[0]);
8221 ref
= scalar_src(&inst
->Src
[1], TGSI_SWIZZLE_X
);
8223 /* SAMPLE_C dst, coord, resource, sampler, ref */
8224 begin_emit_instruction(emit
);
8225 emit_sample_opcode(emit
, VGPU10_OPCODE_SAMPLE_C
,
8226 inst
->Instruction
.Saturate
, offsets
);
8227 emit_dst_register(emit
, get_tex_swizzle_dst(&swz_info
));
8228 emit_src_register(emit
, &coord
);
8229 emit_resource_register(emit
, unit
);
8230 emit_sampler_register(emit
, unit
);
8231 emit_tex_compare_refcoord(emit
, target
, &ref
);
8232 end_emit_instruction(emit
);
8234 end_tex_swizzle(emit
, &swz_info
);
8236 free_temp_indexes(emit
);
8243 * Emit code for TGSI_OPCODE_TXP (projective texture)
8246 emit_txp(struct svga_shader_emitter_v10
*emit
,
8247 const struct tgsi_full_instruction
*inst
)
8249 const uint unit
= inst
->Src
[1].Register
.Index
;
8250 const enum tgsi_texture_type target
= inst
->Texture
.Texture
;
8251 VGPU10_OPCODE_TYPE opcode
;
8253 unsigned tmp
= get_temp_index(emit
);
8254 struct tgsi_full_src_register tmp_src
= make_src_temp_reg(tmp
);
8255 struct tgsi_full_dst_register tmp_dst
= make_dst_temp_reg(tmp
);
8256 struct tgsi_full_src_register src0_wwww
=
8257 scalar_src(&inst
->Src
[0], TGSI_SWIZZLE_W
);
8258 struct tgsi_full_src_register coord
;
8259 struct tex_swizzle_info swz_info
;
8261 /* check that the sampler returns a float */
8262 if (!is_valid_tex_instruction(emit
, inst
))
8265 begin_tex_swizzle(emit
, unit
, inst
, FALSE
, &swz_info
);
8267 get_texel_offsets(emit
, inst
, offsets
);
8269 coord
= setup_texcoord(emit
, unit
, &inst
->Src
[0]);
8271 /* DIV tmp, coord, coord.wwww */
8272 emit_instruction_op2(emit
, VGPU10_OPCODE_DIV
, &tmp_dst
,
8273 &coord
, &src0_wwww
);
8275 /* SAMPLE dst, coord(tmp), resource, sampler */
8276 begin_emit_instruction(emit
);
8278 if (tgsi_is_shadow_target(target
))
8279 /* NOTE: for non-fragment shaders, we should use
8280 * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is.
8282 opcode
= VGPU10_OPCODE_SAMPLE_C
;
8284 opcode
= VGPU10_OPCODE_SAMPLE
;
8286 emit_sample_opcode(emit
, opcode
, inst
->Instruction
.Saturate
, offsets
);
8287 emit_dst_register(emit
, get_tex_swizzle_dst(&swz_info
));
8288 emit_src_register(emit
, &tmp_src
); /* projected coord */
8289 emit_resource_register(emit
, unit
);
8290 emit_sampler_register(emit
, unit
);
8291 if (opcode
== VGPU10_OPCODE_SAMPLE_C
) {
8292 emit_tex_compare_refcoord(emit
, target
, &tmp_src
);
8294 end_emit_instruction(emit
);
8296 end_tex_swizzle(emit
, &swz_info
);
8298 free_temp_indexes(emit
);
8305 * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
8308 emit_txd(struct svga_shader_emitter_v10
*emit
,
8309 const struct tgsi_full_instruction
*inst
)
8311 const uint unit
= inst
->Src
[3].Register
.Index
;
8312 const enum tgsi_texture_type target
= inst
->Texture
.Texture
;
8314 struct tgsi_full_src_register coord
;
8315 struct tex_swizzle_info swz_info
;
8317 begin_tex_swizzle(emit
, unit
, inst
, tgsi_is_shadow_target(target
),
8320 get_texel_offsets(emit
, inst
, offsets
);
8322 coord
= setup_texcoord(emit
, unit
, &inst
->Src
[0]);
8324 /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */
8325 begin_emit_instruction(emit
);
8326 emit_sample_opcode(emit
, VGPU10_OPCODE_SAMPLE_D
,
8327 inst
->Instruction
.Saturate
, offsets
);
8328 emit_dst_register(emit
, get_tex_swizzle_dst(&swz_info
));
8329 emit_src_register(emit
, &coord
);
8330 emit_resource_register(emit
, unit
);
8331 emit_sampler_register(emit
, unit
);
8332 emit_src_register(emit
, &inst
->Src
[1]); /* Xderiv */
8333 emit_src_register(emit
, &inst
->Src
[2]); /* Yderiv */
8334 end_emit_instruction(emit
);
8336 end_tex_swizzle(emit
, &swz_info
);
8338 free_temp_indexes(emit
);
8345 * Emit code for TGSI_OPCODE_TXF (texel fetch)
8348 emit_txf(struct svga_shader_emitter_v10
*emit
,
8349 const struct tgsi_full_instruction
*inst
)
8351 const uint unit
= inst
->Src
[1].Register
.Index
;
8352 const boolean msaa
= tgsi_is_msaa_target(inst
->Texture
.Texture
)
8353 && emit
->key
.tex
[unit
].num_samples
> 1;
8355 struct tex_swizzle_info swz_info
;
8357 begin_tex_swizzle(emit
, unit
, inst
, FALSE
, &swz_info
);
8359 get_texel_offsets(emit
, inst
, offsets
);
8362 assert(emit
->key
.tex
[unit
].num_samples
> 1);
8364 /* Fetch one sample from an MSAA texture */
8365 struct tgsi_full_src_register sampleIndex
=
8366 scalar_src(&inst
->Src
[0], TGSI_SWIZZLE_W
);
8367 /* LD_MS dst, coord(s0), resource, sampleIndex */
8368 begin_emit_instruction(emit
);
8369 emit_sample_opcode(emit
, VGPU10_OPCODE_LD_MS
,
8370 inst
->Instruction
.Saturate
, offsets
);
8371 emit_dst_register(emit
, get_tex_swizzle_dst(&swz_info
));
8372 emit_src_register(emit
, &inst
->Src
[0]);
8373 emit_resource_register(emit
, unit
);
8374 emit_src_register(emit
, &sampleIndex
);
8375 end_emit_instruction(emit
);
8378 /* Fetch one texel specified by integer coordinate */
8379 /* LD dst, coord(s0), resource */
8380 begin_emit_instruction(emit
);
8381 emit_sample_opcode(emit
, VGPU10_OPCODE_LD
,
8382 inst
->Instruction
.Saturate
, offsets
);
8383 emit_dst_register(emit
, get_tex_swizzle_dst(&swz_info
));
8384 emit_src_register(emit
, &inst
->Src
[0]);
8385 emit_resource_register(emit
, unit
);
8386 end_emit_instruction(emit
);
8389 end_tex_swizzle(emit
, &swz_info
);
8391 free_temp_indexes(emit
);
8398 * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias)
8399 * or TGSI_OPCODE_TXB2 (for cube shadow maps).
8402 emit_txl_txb(struct svga_shader_emitter_v10
*emit
,
8403 const struct tgsi_full_instruction
*inst
)
8405 const enum tgsi_texture_type target
= inst
->Texture
.Texture
;
8406 VGPU10_OPCODE_TYPE opcode
;
8409 struct tgsi_full_src_register coord
, lod_bias
;
8410 struct tex_swizzle_info swz_info
;
8412 assert(inst
->Instruction
.Opcode
== TGSI_OPCODE_TXL
||
8413 inst
->Instruction
.Opcode
== TGSI_OPCODE_TXB
||
8414 inst
->Instruction
.Opcode
== TGSI_OPCODE_TXB2
);
8416 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TXB2
) {
8417 lod_bias
= scalar_src(&inst
->Src
[1], TGSI_SWIZZLE_X
);
8418 unit
= inst
->Src
[2].Register
.Index
;
8421 lod_bias
= scalar_src(&inst
->Src
[0], TGSI_SWIZZLE_W
);
8422 unit
= inst
->Src
[1].Register
.Index
;
8425 begin_tex_swizzle(emit
, unit
, inst
, tgsi_is_shadow_target(target
),
8428 get_texel_offsets(emit
, inst
, offsets
);
8430 coord
= setup_texcoord(emit
, unit
, &inst
->Src
[0]);
8432 /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */
8433 begin_emit_instruction(emit
);
8434 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_TXL
) {
8435 opcode
= VGPU10_OPCODE_SAMPLE_L
;
8438 opcode
= VGPU10_OPCODE_SAMPLE_B
;
8440 emit_sample_opcode(emit
, opcode
, inst
->Instruction
.Saturate
, offsets
);
8441 emit_dst_register(emit
, get_tex_swizzle_dst(&swz_info
));
8442 emit_src_register(emit
, &coord
);
8443 emit_resource_register(emit
, unit
);
8444 emit_sampler_register(emit
, unit
);
8445 emit_src_register(emit
, &lod_bias
);
8446 end_emit_instruction(emit
);
8448 end_tex_swizzle(emit
, &swz_info
);
8450 free_temp_indexes(emit
);
8457 * Emit code for TGSI_OPCODE_TXL2 (explicit LOD) for cubemap array.
8460 emit_txl2(struct svga_shader_emitter_v10
*emit
,
8461 const struct tgsi_full_instruction
*inst
)
8463 unsigned target
= inst
->Texture
.Texture
;
8464 unsigned opcode
, unit
;
8466 struct tgsi_full_src_register coord
, lod
;
8467 struct tex_swizzle_info swz_info
;
8469 assert(inst
->Instruction
.Opcode
== TGSI_OPCODE_TXL2
);
8471 lod
= scalar_src(&inst
->Src
[1], TGSI_SWIZZLE_X
);
8472 unit
= inst
->Src
[2].Register
.Index
;
8474 begin_tex_swizzle(emit
, unit
, inst
, tgsi_is_shadow_target(target
),
8477 get_texel_offsets(emit
, inst
, offsets
);
8479 coord
= setup_texcoord(emit
, unit
, &inst
->Src
[0]);
8481 /* SAMPLE_L dst, coord(s0), resource, sampler, lod(s3) */
8482 begin_emit_instruction(emit
);
8483 opcode
= VGPU10_OPCODE_SAMPLE_L
;
8484 emit_sample_opcode(emit
, opcode
, inst
->Instruction
.Saturate
, offsets
);
8485 emit_dst_register(emit
, get_tex_swizzle_dst(&swz_info
));
8486 emit_src_register(emit
, &coord
);
8487 emit_resource_register(emit
, unit
);
8488 emit_sampler_register(emit
, unit
);
8489 emit_src_register(emit
, &lod
);
8490 end_emit_instruction(emit
);
8492 end_tex_swizzle(emit
, &swz_info
);
8494 free_temp_indexes(emit
);
8501 * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
8504 emit_txq(struct svga_shader_emitter_v10
*emit
,
8505 const struct tgsi_full_instruction
*inst
)
8507 const uint unit
= inst
->Src
[1].Register
.Index
;
8509 if (emit
->sampler_target
[unit
] == TGSI_TEXTURE_BUFFER
) {
8510 /* RESINFO does not support querying texture buffers, so we instead
8511 * store texture buffer sizes in shader constants, then copy them to
8512 * implement TXQ instead of emitting RESINFO.
8513 * MOV dst, const[texture_buffer_size_index[unit]]
8515 struct tgsi_full_src_register size_src
=
8516 make_src_const_reg(emit
->texture_buffer_size_index
[unit
]);
8517 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &inst
->Dst
[0], &size_src
);
8519 /* RESINFO dst, srcMipLevel, resource */
8520 begin_emit_instruction(emit
);
8521 emit_opcode_resinfo(emit
, VGPU10_RESINFO_RETURN_UINT
);
8522 emit_dst_register(emit
, &inst
->Dst
[0]);
8523 emit_src_register(emit
, &inst
->Src
[0]);
8524 emit_resource_register(emit
, unit
);
8525 end_emit_instruction(emit
);
8528 free_temp_indexes(emit
);
8535 * Does this opcode produce a double-precision result?
8536 * XXX perhaps move this to a TGSI utility.
8539 opcode_has_dbl_dst(unsigned opcode
)
8542 case TGSI_OPCODE_F2D
:
8543 case TGSI_OPCODE_DABS
:
8544 case TGSI_OPCODE_DADD
:
8545 case TGSI_OPCODE_DFRAC
:
8546 case TGSI_OPCODE_DMAX
:
8547 case TGSI_OPCODE_DMIN
:
8548 case TGSI_OPCODE_DMUL
:
8549 case TGSI_OPCODE_DNEG
:
8550 case TGSI_OPCODE_I2D
:
8551 case TGSI_OPCODE_U2D
:
8561 * Does this opcode use double-precision source registers?
8564 opcode_has_dbl_src(unsigned opcode
)
8567 case TGSI_OPCODE_D2F
:
8568 case TGSI_OPCODE_DABS
:
8569 case TGSI_OPCODE_DADD
:
8570 case TGSI_OPCODE_DFRAC
:
8571 case TGSI_OPCODE_DMAX
:
8572 case TGSI_OPCODE_DMIN
:
8573 case TGSI_OPCODE_DMUL
:
8574 case TGSI_OPCODE_DNEG
:
8575 case TGSI_OPCODE_D2I
:
8576 case TGSI_OPCODE_D2U
:
8586 * Check that the swizzle for reading from a double-precision register
8590 check_double_src_swizzle(const struct tgsi_full_src_register
*reg
)
8592 assert((reg
->Register
.SwizzleX
== PIPE_SWIZZLE_X
&&
8593 reg
->Register
.SwizzleY
== PIPE_SWIZZLE_Y
) ||
8594 (reg
->Register
.SwizzleX
== PIPE_SWIZZLE_Z
&&
8595 reg
->Register
.SwizzleY
== PIPE_SWIZZLE_W
));
8597 assert((reg
->Register
.SwizzleZ
== PIPE_SWIZZLE_X
&&
8598 reg
->Register
.SwizzleW
== PIPE_SWIZZLE_Y
) ||
8599 (reg
->Register
.SwizzleZ
== PIPE_SWIZZLE_Z
&&
8600 reg
->Register
.SwizzleW
== PIPE_SWIZZLE_W
));
8605 * Check that the writemask for a double-precision instruction is valid.
8608 check_double_dst_writemask(const struct tgsi_full_instruction
*inst
)
8610 ASSERTED
unsigned writemask
= inst
->Dst
[0].Register
.WriteMask
;
8612 switch (inst
->Instruction
.Opcode
) {
8613 case TGSI_OPCODE_DABS
:
8614 case TGSI_OPCODE_DADD
:
8615 case TGSI_OPCODE_DFRAC
:
8616 case TGSI_OPCODE_DNEG
:
8617 case TGSI_OPCODE_DMAD
:
8618 case TGSI_OPCODE_DMAX
:
8619 case TGSI_OPCODE_DMIN
:
8620 case TGSI_OPCODE_DMUL
:
8621 case TGSI_OPCODE_DRCP
:
8622 case TGSI_OPCODE_DSQRT
:
8623 case TGSI_OPCODE_F2D
:
8624 assert(writemask
== TGSI_WRITEMASK_XYZW
||
8625 writemask
== TGSI_WRITEMASK_XY
||
8626 writemask
== TGSI_WRITEMASK_ZW
);
8628 case TGSI_OPCODE_DSEQ
:
8629 case TGSI_OPCODE_DSGE
:
8630 case TGSI_OPCODE_DSNE
:
8631 case TGSI_OPCODE_DSLT
:
8632 case TGSI_OPCODE_D2I
:
8633 case TGSI_OPCODE_D2U
:
8634 /* Write to 1 or 2 components only */
8635 assert(util_bitcount(writemask
) <= 2);
8638 /* XXX this list may be incomplete */
8645 * Double-precision absolute value.
8648 emit_dabs(struct svga_shader_emitter_v10
*emit
,
8649 const struct tgsi_full_instruction
*inst
)
8651 assert(emit
->version
>= 50);
8652 check_double_src_swizzle(&inst
->Src
[0]);
8653 check_double_dst_writemask(inst
);
8655 struct tgsi_full_src_register abs_src
= absolute_src(&inst
->Src
[0]);
8657 /* DMOV dst, |src| */
8658 emit_instruction_op1(emit
, VGPU10_OPCODE_DMOV
, &inst
->Dst
[0], &abs_src
);
8665 * Double-precision negation
8668 emit_dneg(struct svga_shader_emitter_v10
*emit
,
8669 const struct tgsi_full_instruction
*inst
)
8671 assert(emit
->version
>= 50);
8672 check_double_src_swizzle(&inst
->Src
[0]);
8673 check_double_dst_writemask(inst
);
8675 struct tgsi_full_src_register neg_src
= negate_src(&inst
->Src
[0]);
8677 /* DMOV dst, -src */
8678 emit_instruction_op1(emit
, VGPU10_OPCODE_DMOV
, &inst
->Dst
[0], &neg_src
);
8685 * SM5 has no DMAD opcode. Implement negation with DMUL/DADD.
8688 emit_dmad(struct svga_shader_emitter_v10
*emit
,
8689 const struct tgsi_full_instruction
*inst
)
8691 assert(emit
->version
>= 50);
8692 check_double_src_swizzle(&inst
->Src
[0]);
8693 check_double_src_swizzle(&inst
->Src
[1]);
8694 check_double_src_swizzle(&inst
->Src
[2]);
8695 check_double_dst_writemask(inst
);
8697 unsigned tmp
= get_temp_index(emit
);
8698 struct tgsi_full_src_register tmp_src
= make_src_temp_reg(tmp
);
8699 struct tgsi_full_dst_register tmp_dst
= make_dst_temp_reg(tmp
);
8701 /* DMUL tmp, src[0], src[1] */
8702 emit_instruction_opn(emit
, VGPU10_OPCODE_DMUL
,
8703 &tmp_dst
, &inst
->Src
[0], &inst
->Src
[1], NULL
,
8704 FALSE
, inst
->Instruction
.Precise
);
8706 /* DADD dst, tmp, src[2] */
8707 emit_instruction_opn(emit
, VGPU10_OPCODE_DADD
,
8708 &inst
->Dst
[0], &tmp_src
, &inst
->Src
[2], NULL
,
8709 inst
->Instruction
.Saturate
, inst
->Instruction
.Precise
);
8710 free_temp_indexes(emit
);
8717 * Double precision reciprocal square root
8720 emit_drsq(struct svga_shader_emitter_v10
*emit
,
8721 const struct tgsi_full_dst_register
*dst
,
8722 const struct tgsi_full_src_register
*src
)
8724 assert(emit
->version
>= 50);
8726 VGPU10OpcodeToken0 token0
;
8727 begin_emit_instruction(emit
);
8730 token0
.opcodeType
= VGPU10_OPCODE_VMWARE
;
8731 token0
.vmwareOpcodeType
= VGPU10_VMWARE_OPCODE_DRSQ
;
8732 emit_dword(emit
, token0
.value
);
8734 emit_dst_register(emit
, dst
);
8736 check_double_src_swizzle(src
);
8737 emit_src_register(emit
, src
);
8739 end_emit_instruction(emit
);
8746 * There is no SM5 opcode for double precision square root.
8747 * It will be implemented with DRSQ.
8748 * dst = src * DRSQ(src)
8751 emit_dsqrt(struct svga_shader_emitter_v10
*emit
,
8752 const struct tgsi_full_instruction
*inst
)
8754 assert(emit
->version
>= 50);
8756 check_double_src_swizzle(&inst
->Src
[0]);
8758 /* temporary register to hold the source */
8759 unsigned tmp
= get_temp_index(emit
);
8760 struct tgsi_full_dst_register tmp_dst
= make_dst_temp_reg(tmp
);
8761 struct tgsi_full_src_register tmp_src
= make_src_temp_reg(tmp
);
8763 /* temporary register to hold the DEQ result */
8764 unsigned tmp_cond
= get_temp_index(emit
);
8765 struct tgsi_full_dst_register tmp_cond_dst
= make_dst_temp_reg(tmp_cond
);
8766 struct tgsi_full_dst_register tmp_cond_dst_xy
=
8767 writemask_dst(&tmp_cond_dst
, TGSI_WRITEMASK_X
| TGSI_WRITEMASK_Y
);
8768 struct tgsi_full_src_register tmp_cond_src
= make_src_temp_reg(tmp_cond
);
8769 struct tgsi_full_src_register tmp_cond_src_xy
=
8770 swizzle_src(&tmp_cond_src
,
8771 PIPE_SWIZZLE_X
, PIPE_SWIZZLE_Y
,
8772 PIPE_SWIZZLE_X
, PIPE_SWIZZLE_Y
);
8774 /* The reciprocal square root of zero yields INF.
8775 * So if the source is 0, we replace it with 1 in the tmp register.
8776 * The later multiplication of zero in the original source will yield 0
8780 /* tmp1 = (src == 0) ? 1 : src;
8782 * MOVC tmp, tmp1, 1.0, src
8784 struct tgsi_full_src_register zero
=
8785 make_immediate_reg_double(emit
, 0);
8787 struct tgsi_full_src_register one
=
8788 make_immediate_reg_double(emit
, 1.0);
8790 emit_instruction_op2(emit
, VGPU10_OPCODE_DEQ
, &tmp_cond_dst_xy
,
8791 &zero
, &inst
->Src
[0]);
8792 emit_instruction_op3(emit
, VGPU10_OPCODE_DMOVC
, &tmp_dst
,
8793 &tmp_cond_src_xy
, &one
, &inst
->Src
[0]);
8795 struct tgsi_full_dst_register tmp_rsq_dst
= make_dst_temp_reg(tmp
);
8796 struct tgsi_full_src_register tmp_rsq_src
= make_src_temp_reg(tmp
);
8798 /* DRSQ tmp_rsq, tmp */
8799 emit_drsq(emit
, &tmp_rsq_dst
, &tmp_src
);
8801 /* DMUL dst, tmp_rsq, src[0] */
8802 emit_instruction_op2(emit
, VGPU10_OPCODE_DMUL
, &inst
->Dst
[0],
8803 &tmp_rsq_src
, &inst
->Src
[0]);
8805 free_temp_indexes(emit
);
8812 emit_interp_offset(struct svga_shader_emitter_v10
*emit
,
8813 const struct tgsi_full_instruction
*inst
)
8815 assert(emit
->version
>= 50);
8817 /* The src1.xy offset is a float with values in the range [-0.5, 0.5]
8818 * where (0,0) is the center of the pixel. We need to translate that
8819 * into an integer offset on a 16x16 grid in the range [-8/16, 7/16].
8820 * Also need to flip the Y axis (I think).
8822 unsigned tmp
= get_temp_index(emit
);
8823 struct tgsi_full_src_register tmp_src
= make_src_temp_reg(tmp
);
8824 struct tgsi_full_dst_register tmp_dst
= make_dst_temp_reg(tmp
);
8825 struct tgsi_full_dst_register tmp_dst_xy
=
8826 writemask_dst(&tmp_dst
, TGSI_WRITEMASK_X
| TGSI_WRITEMASK_Y
);
8827 struct tgsi_full_src_register const16
=
8828 make_immediate_reg_float4(emit
, 16.0f
, -16.0, 0, 0);
8830 /* MUL tmp.xy, src1, {16, -16, 0, 0} */
8831 emit_instruction_op2(emit
, VGPU10_OPCODE_MUL
,
8832 &tmp_dst_xy
, &inst
->Src
[1], &const16
);
8834 /* FTOI tmp.xy, tmp */
8835 emit_instruction_op1(emit
, VGPU10_OPCODE_FTOI
, &tmp_dst_xy
, &tmp_src
);
8837 /* EVAL_SNAPPED dst, src0, tmp */
8838 emit_instruction_op2(emit
, VGPU10_OPCODE_EVAL_SNAPPED
,
8839 &inst
->Dst
[0], &inst
->Src
[0], &tmp_src
);
8841 free_temp_indexes(emit
);
8848 * Emit a simple instruction (like ADD, MUL, MIN, etc).
8851 emit_simple(struct svga_shader_emitter_v10
*emit
,
8852 const struct tgsi_full_instruction
*inst
)
8854 const enum tgsi_opcode opcode
= inst
->Instruction
.Opcode
;
8855 const struct tgsi_opcode_info
*op
= tgsi_get_opcode_info(opcode
);
8856 const bool dbl_dst
= opcode_has_dbl_dst(inst
->Instruction
.Opcode
);
8857 const bool dbl_src
= opcode_has_dbl_src(inst
->Instruction
.Opcode
);
8860 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_BGNLOOP
) {
8861 emit
->current_loop_depth
++;
8863 else if (inst
->Instruction
.Opcode
== TGSI_OPCODE_ENDLOOP
) {
8864 emit
->current_loop_depth
--;
8867 begin_emit_instruction(emit
);
8868 emit_opcode_precise(emit
, translate_opcode(inst
->Instruction
.Opcode
),
8869 inst
->Instruction
.Saturate
,
8870 inst
->Instruction
.Precise
);
8871 for (i
= 0; i
< op
->num_dst
; i
++) {
8873 check_double_dst_writemask(inst
);
8875 emit_dst_register(emit
, &inst
->Dst
[i
]);
8877 for (i
= 0; i
< op
->num_src
; i
++) {
8879 check_double_src_swizzle(&inst
->Src
[i
]);
8881 emit_src_register(emit
, &inst
->Src
[i
]);
8883 end_emit_instruction(emit
);
8890 * Emit MSB instruction (like IMSB, UMSB).
8892 * GLSL returns the index starting from the LSB;
8893 * whereas in SM5, firstbit_hi/shi returns the index starting from the MSB.
8894 * To get correct location as per glsl from SM5 device, we should
8895 * return (31 - index) if returned index is not -1.
8898 emit_msb(struct svga_shader_emitter_v10
*emit
,
8899 const struct tgsi_full_instruction
*inst
)
8901 const struct tgsi_full_dst_register
*index_dst
= &inst
->Dst
[0];
8903 assert(index_dst
->Register
.File
!= TGSI_FILE_OUTPUT
);
8905 struct tgsi_full_src_register index_src
=
8906 make_src_reg(index_dst
->Register
.File
, index_dst
->Register
.Index
);
8907 struct tgsi_full_src_register imm31
=
8908 make_immediate_reg_int(emit
, 31);
8909 imm31
= scalar_src(&imm31
, TGSI_SWIZZLE_X
);
8910 struct tgsi_full_src_register neg_one
=
8911 make_immediate_reg_int(emit
, -1);
8912 neg_one
= scalar_src(&neg_one
, TGSI_SWIZZLE_X
);
8913 unsigned tmp
= get_temp_index(emit
);
8914 const struct tgsi_full_dst_register tmp_dst
=
8915 make_dst_temp_reg(tmp
);
8916 const struct tgsi_full_dst_register tmp_dst_x
=
8917 writemask_dst(&tmp_dst
, TGSI_WRITEMASK_X
);
8918 const struct tgsi_full_src_register tmp_src_x
=
8919 make_src_scalar_reg(TGSI_FILE_TEMPORARY
, tmp
, TGSI_SWIZZLE_X
);
8920 int writemask
= TGSI_WRITEMASK_X
;
8921 int src_swizzle
= TGSI_SWIZZLE_X
;
8922 int dst_writemask
= index_dst
->Register
.WriteMask
;
8924 emit_simple(emit
, inst
);
8926 /* index conversion from SM5 to GLSL */
8927 while (writemask
& dst_writemask
) {
8928 struct tgsi_full_src_register index_src_comp
=
8929 scalar_src(&index_src
, src_swizzle
);
8930 struct tgsi_full_dst_register index_dst_comp
=
8931 writemask_dst(index_dst
, writemask
);
8933 /* check if index_src_comp != -1 */
8934 emit_instruction_op2(emit
, VGPU10_OPCODE_INE
,
8935 &tmp_dst_x
, &index_src_comp
, &neg_one
);
8938 emit_if(emit
, &tmp_src_x
);
8940 index_src_comp
= negate_src(&index_src_comp
);
8941 /* SUB DST, IMM{31}, DST */
8942 emit_instruction_op2(emit
, VGPU10_OPCODE_IADD
,
8943 &index_dst_comp
, &imm31
, &index_src_comp
);
8946 emit_instruction_op0(emit
, VGPU10_OPCODE_ENDIF
);
8948 writemask
= writemask
<< 1;
8949 src_swizzle
= src_swizzle
+ 1;
8951 free_temp_indexes(emit
);
8957 * Emit a BFE instruction (like UBFE, IBFE).
8958 * tgsi representation:
8959 * U/IBFE dst, value, offset, width
8960 * SM5 representation:
8961 * U/IBFE dst, width, offset, value
8962 * Note: SM5 has width & offset range (0-31);
8963 * whereas GLSL has width & offset range (0-32)
8966 emit_bfe(struct svga_shader_emitter_v10
*emit
,
8967 const struct tgsi_full_instruction
*inst
)
8969 const enum tgsi_opcode opcode
= inst
->Instruction
.Opcode
;
8970 struct tgsi_full_src_register imm32
= make_immediate_reg_int(emit
, 32);
8971 imm32
= scalar_src(&imm32
, TGSI_SWIZZLE_X
);
8972 struct tgsi_full_src_register zero
= make_immediate_reg_int(emit
, 0);
8973 zero
= scalar_src(&zero
, TGSI_SWIZZLE_X
);
8975 unsigned tmp1
= get_temp_index(emit
);
8976 const struct tgsi_full_dst_register cond1_dst
= make_dst_temp_reg(tmp1
);
8977 const struct tgsi_full_dst_register cond1_dst_x
=
8978 writemask_dst(&cond1_dst
, TGSI_WRITEMASK_X
);
8979 const struct tgsi_full_src_register cond1_src_x
=
8980 make_src_scalar_reg(TGSI_FILE_TEMPORARY
, tmp1
, TGSI_SWIZZLE_X
);
8982 unsigned tmp2
= get_temp_index(emit
);
8983 const struct tgsi_full_dst_register cond2_dst
= make_dst_temp_reg(tmp2
);
8984 const struct tgsi_full_dst_register cond2_dst_x
=
8985 writemask_dst(&cond2_dst
, TGSI_WRITEMASK_X
);
8986 const struct tgsi_full_src_register cond2_src_x
=
8987 make_src_scalar_reg(TGSI_FILE_TEMPORARY
, tmp2
, TGSI_SWIZZLE_X
);
8990 * In SM5, when width = 32 and offset = 0, it returns 0.
8991 * On the other hand GLSL, expects value to be copied as it is, to dst.
8994 /* cond1 = width ! = 32 */
8995 emit_instruction_op2(emit
, VGPU10_OPCODE_IEQ
,
8996 &cond1_dst_x
, &inst
->Src
[2], &imm32
);
8998 /* cond2 = offset ! = 0 */
8999 emit_instruction_op2(emit
, VGPU10_OPCODE_IEQ
,
9000 &cond2_dst_x
, &inst
->Src
[1], &zero
);
9002 /* cond 2 = cond1 & cond 2 */
9003 emit_instruction_op2(emit
, VGPU10_OPCODE_AND
, &cond2_dst_x
,
9007 emit_if(emit
, &cond2_src_x
);
9009 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &inst
->Dst
[0],
9013 emit_instruction_op0(emit
, VGPU10_OPCODE_ELSE
);
9015 /* U/IBFE dst, width, offset, value */
9016 emit_instruction_op3(emit
, translate_opcode(opcode
), &inst
->Dst
[0],
9017 &inst
->Src
[2], &inst
->Src
[1], &inst
->Src
[0]);
9020 emit_instruction_op0(emit
, VGPU10_OPCODE_ENDIF
);
9022 free_temp_indexes(emit
);
9028 * Emit BFI instruction
9029 * tgsi representation:
9030 * BFI dst, base, insert, offset, width
9031 * SM5 representation:
9032 * BFI dst, width, offset, insert, base
9033 * Note: SM5 has width & offset range (0-31);
9034 * whereas GLSL has width & offset range (0-32)
9037 emit_bfi(struct svga_shader_emitter_v10
*emit
,
9038 const struct tgsi_full_instruction
*inst
)
9040 const enum tgsi_opcode opcode
= inst
->Instruction
.Opcode
;
9041 struct tgsi_full_src_register imm32
= make_immediate_reg_int(emit
, 32);
9042 imm32
= scalar_src(&imm32
, TGSI_SWIZZLE_X
);
9044 struct tgsi_full_src_register zero
= make_immediate_reg_int(emit
, 0);
9045 zero
= scalar_src(&zero
, TGSI_SWIZZLE_X
);
9047 unsigned tmp1
= get_temp_index(emit
);
9048 const struct tgsi_full_dst_register cond1_dst
= make_dst_temp_reg(tmp1
);
9049 const struct tgsi_full_dst_register cond1_dst_x
=
9050 writemask_dst(&cond1_dst
, TGSI_WRITEMASK_X
);
9051 const struct tgsi_full_src_register cond1_src_x
=
9052 make_src_scalar_reg(TGSI_FILE_TEMPORARY
, tmp1
, TGSI_SWIZZLE_X
);
9054 unsigned tmp2
= get_temp_index(emit
);
9055 const struct tgsi_full_dst_register cond2_dst
= make_dst_temp_reg(tmp2
);
9056 const struct tgsi_full_dst_register cond2_dst_x
=
9057 writemask_dst(&cond2_dst
, TGSI_WRITEMASK_X
);
9058 const struct tgsi_full_src_register cond2_src_x
=
9059 make_src_scalar_reg(TGSI_FILE_TEMPORARY
, tmp2
, TGSI_SWIZZLE_X
);
9062 * In SM5, when width = 32 and offset = 0, it returns 0.
9063 * On the other hand GLSL, expects insert to be copied as it is, to dst.
9066 /* cond1 = width == 32 */
9067 emit_instruction_op2(emit
, VGPU10_OPCODE_IEQ
,
9068 &cond1_dst_x
, &inst
->Src
[3], &imm32
);
9070 /* cond1 = offset == 0 */
9071 emit_instruction_op2(emit
, VGPU10_OPCODE_IEQ
,
9072 &cond2_dst_x
, &inst
->Src
[2], &zero
);
9074 /* cond2 = cond1 & cond2 */
9075 emit_instruction_op2(emit
, VGPU10_OPCODE_AND
,
9076 &cond2_dst_x
, &cond2_src_x
, &cond1_src_x
);
9079 emit_if(emit
, &cond2_src_x
);
9081 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &inst
->Dst
[0],
9085 emit_instruction_op0(emit
, VGPU10_OPCODE_ELSE
);
9087 /* BFI dst, width, offset, insert, base */
9088 begin_emit_instruction(emit
);
9089 emit_opcode(emit
, translate_opcode(opcode
), inst
->Instruction
.Saturate
);
9090 emit_dst_register(emit
, &inst
->Dst
[0]);
9091 emit_src_register(emit
, &inst
->Src
[3]);
9092 emit_src_register(emit
, &inst
->Src
[2]);
9093 emit_src_register(emit
, &inst
->Src
[1]);
9094 emit_src_register(emit
, &inst
->Src
[0]);
9095 end_emit_instruction(emit
);
9098 emit_instruction_op0(emit
, VGPU10_OPCODE_ENDIF
);
9100 free_temp_indexes(emit
);
9106 * We only special case the MOV instruction to try to detect constant
9107 * color writes in the fragment shader.
9110 emit_mov(struct svga_shader_emitter_v10
*emit
,
9111 const struct tgsi_full_instruction
*inst
)
9113 const struct tgsi_full_src_register
*src
= &inst
->Src
[0];
9114 const struct tgsi_full_dst_register
*dst
= &inst
->Dst
[0];
9116 if (emit
->unit
== PIPE_SHADER_FRAGMENT
&&
9117 dst
->Register
.File
== TGSI_FILE_OUTPUT
&&
9118 dst
->Register
.Index
== 0 &&
9119 src
->Register
.File
== TGSI_FILE_CONSTANT
&&
9120 !src
->Register
.Indirect
) {
9121 emit
->constant_color_output
= TRUE
;
9124 return emit_simple(emit
, inst
);
9129 * Emit a simple VGPU10 instruction which writes to multiple dest registers,
9130 * where TGSI only uses one dest register.
9133 emit_simple_1dst(struct svga_shader_emitter_v10
*emit
,
9134 const struct tgsi_full_instruction
*inst
,
9138 const enum tgsi_opcode opcode
= inst
->Instruction
.Opcode
;
9139 const struct tgsi_opcode_info
*op
= tgsi_get_opcode_info(opcode
);
9142 begin_emit_instruction(emit
);
9143 emit_opcode(emit
, translate_opcode(opcode
), inst
->Instruction
.Saturate
);
9145 for (i
= 0; i
< dst_count
; i
++) {
9146 if (i
== dst_index
) {
9147 emit_dst_register(emit
, &inst
->Dst
[0]);
9149 emit_null_dst_register(emit
);
9153 for (i
= 0; i
< op
->num_src
; i
++) {
9154 emit_src_register(emit
, &inst
->Src
[i
]);
9156 end_emit_instruction(emit
);
9163 * Emit a vmware specific VGPU10 instruction.
9166 emit_vmware(struct svga_shader_emitter_v10
*emit
,
9167 const struct tgsi_full_instruction
*inst
,
9168 VGPU10_VMWARE_OPCODE_TYPE subopcode
)
9170 VGPU10OpcodeToken0 token0
;
9171 const enum tgsi_opcode opcode
= inst
->Instruction
.Opcode
;
9172 const struct tgsi_opcode_info
*op
= tgsi_get_opcode_info(opcode
);
9173 const bool dbl_dst
= opcode_has_dbl_dst(inst
->Instruction
.Opcode
);
9174 const bool dbl_src
= opcode_has_dbl_src(inst
->Instruction
.Opcode
);
9178 begin_emit_instruction(emit
);
9180 assert((subopcode
> 0 && emit
->version
>= 50) || subopcode
== 0);
9183 token0
.opcodeType
= VGPU10_OPCODE_VMWARE
;
9184 token0
.vmwareOpcodeType
= subopcode
;
9185 emit_dword(emit
, token0
.value
);
9187 if (subopcode
== VGPU10_VMWARE_OPCODE_IDIV
) {
9188 /* IDIV only uses the first dest register. */
9189 emit_dst_register(emit
, &inst
->Dst
[0]);
9190 emit_null_dst_register(emit
);
9192 for (i
= 0; i
< op
->num_dst
; i
++) {
9194 check_double_dst_writemask(inst
);
9196 emit_dst_register(emit
, &inst
->Dst
[i
]);
9200 for (i
= 0; i
< op
->num_src
; i
++) {
9202 check_double_src_swizzle(&inst
->Src
[i
]);
9204 emit_src_register(emit
, &inst
->Src
[i
]);
9206 end_emit_instruction(emit
);
9213 * Translate a single TGSI instruction to VGPU10.
9216 emit_vgpu10_instruction(struct svga_shader_emitter_v10
*emit
,
9217 unsigned inst_number
,
9218 const struct tgsi_full_instruction
*inst
)
9220 const enum tgsi_opcode opcode
= inst
->Instruction
.Opcode
;
9222 if (emit
->skip_instruction
)
9226 case TGSI_OPCODE_ADD
:
9227 case TGSI_OPCODE_AND
:
9228 case TGSI_OPCODE_BGNLOOP
:
9229 case TGSI_OPCODE_BRK
:
9230 case TGSI_OPCODE_CEIL
:
9231 case TGSI_OPCODE_CONT
:
9232 case TGSI_OPCODE_DDX
:
9233 case TGSI_OPCODE_DDY
:
9234 case TGSI_OPCODE_DIV
:
9235 case TGSI_OPCODE_DP2
:
9236 case TGSI_OPCODE_DP3
:
9237 case TGSI_OPCODE_DP4
:
9238 case TGSI_OPCODE_ELSE
:
9239 case TGSI_OPCODE_ENDIF
:
9240 case TGSI_OPCODE_ENDLOOP
:
9241 case TGSI_OPCODE_ENDSUB
:
9242 case TGSI_OPCODE_F2I
:
9243 case TGSI_OPCODE_F2U
:
9244 case TGSI_OPCODE_FLR
:
9245 case TGSI_OPCODE_FRC
:
9246 case TGSI_OPCODE_FSEQ
:
9247 case TGSI_OPCODE_FSGE
:
9248 case TGSI_OPCODE_FSLT
:
9249 case TGSI_OPCODE_FSNE
:
9250 case TGSI_OPCODE_I2F
:
9251 case TGSI_OPCODE_IMAX
:
9252 case TGSI_OPCODE_IMIN
:
9253 case TGSI_OPCODE_INEG
:
9254 case TGSI_OPCODE_ISGE
:
9255 case TGSI_OPCODE_ISHR
:
9256 case TGSI_OPCODE_ISLT
:
9257 case TGSI_OPCODE_MAD
:
9258 case TGSI_OPCODE_MAX
:
9259 case TGSI_OPCODE_MIN
:
9260 case TGSI_OPCODE_MUL
:
9261 case TGSI_OPCODE_NOP
:
9262 case TGSI_OPCODE_NOT
:
9263 case TGSI_OPCODE_OR
:
9264 case TGSI_OPCODE_UADD
:
9265 case TGSI_OPCODE_USEQ
:
9266 case TGSI_OPCODE_USGE
:
9267 case TGSI_OPCODE_USLT
:
9268 case TGSI_OPCODE_UMIN
:
9269 case TGSI_OPCODE_UMAD
:
9270 case TGSI_OPCODE_UMAX
:
9271 case TGSI_OPCODE_ROUND
:
9272 case TGSI_OPCODE_SQRT
:
9273 case TGSI_OPCODE_SHL
:
9274 case TGSI_OPCODE_TRUNC
:
9275 case TGSI_OPCODE_U2F
:
9276 case TGSI_OPCODE_UCMP
:
9277 case TGSI_OPCODE_USHR
:
9278 case TGSI_OPCODE_USNE
:
9279 case TGSI_OPCODE_XOR
:
9280 /* Begin SM5 opcodes */
9281 case TGSI_OPCODE_F2D
:
9282 case TGSI_OPCODE_D2F
:
9283 case TGSI_OPCODE_DADD
:
9284 case TGSI_OPCODE_DMUL
:
9285 case TGSI_OPCODE_DMAX
:
9286 case TGSI_OPCODE_DMIN
:
9287 case TGSI_OPCODE_DSGE
:
9288 case TGSI_OPCODE_DSLT
:
9289 case TGSI_OPCODE_DSEQ
:
9290 case TGSI_OPCODE_DSNE
:
9291 case TGSI_OPCODE_BREV
:
9292 case TGSI_OPCODE_POPC
:
9293 case TGSI_OPCODE_LSB
:
9294 case TGSI_OPCODE_INTERP_CENTROID
:
9295 case TGSI_OPCODE_INTERP_SAMPLE
:
9296 /* simple instructions */
9297 return emit_simple(emit
, inst
);
9298 case TGSI_OPCODE_RET
:
9299 if (emit
->unit
== PIPE_SHADER_TESS_CTRL
&&
9300 !emit
->tcs
.control_point_phase
) {
9302 /* store the tessellation levels in the patch constant phase only */
9303 store_tesslevels(emit
);
9305 return emit_simple(emit
, inst
);
9307 case TGSI_OPCODE_IMSB
:
9308 case TGSI_OPCODE_UMSB
:
9309 return emit_msb(emit
, inst
);
9310 case TGSI_OPCODE_IBFE
:
9311 case TGSI_OPCODE_UBFE
:
9312 return emit_bfe(emit
, inst
);
9313 case TGSI_OPCODE_BFI
:
9314 return emit_bfi(emit
, inst
);
9315 case TGSI_OPCODE_MOV
:
9316 return emit_mov(emit
, inst
);
9317 case TGSI_OPCODE_EMIT
:
9318 return emit_vertex(emit
, inst
);
9319 case TGSI_OPCODE_ENDPRIM
:
9320 return emit_endprim(emit
, inst
);
9321 case TGSI_OPCODE_IABS
:
9322 return emit_iabs(emit
, inst
);
9323 case TGSI_OPCODE_ARL
:
9325 case TGSI_OPCODE_UARL
:
9326 return emit_arl_uarl(emit
, inst
);
9327 case TGSI_OPCODE_BGNSUB
:
9330 case TGSI_OPCODE_CAL
:
9331 return emit_cal(emit
, inst
);
9332 case TGSI_OPCODE_CMP
:
9333 return emit_cmp(emit
, inst
);
9334 case TGSI_OPCODE_COS
:
9335 return emit_sincos(emit
, inst
);
9336 case TGSI_OPCODE_DST
:
9337 return emit_dst(emit
, inst
);
9338 case TGSI_OPCODE_EX2
:
9339 return emit_ex2(emit
, inst
);
9340 case TGSI_OPCODE_EXP
:
9341 return emit_exp(emit
, inst
);
9342 case TGSI_OPCODE_IF
:
9343 return emit_if(emit
, &inst
->Src
[0]);
9344 case TGSI_OPCODE_KILL
:
9345 return emit_kill(emit
, inst
);
9346 case TGSI_OPCODE_KILL_IF
:
9347 return emit_kill_if(emit
, inst
);
9348 case TGSI_OPCODE_LG2
:
9349 return emit_lg2(emit
, inst
);
9350 case TGSI_OPCODE_LIT
:
9351 return emit_lit(emit
, inst
);
9352 case TGSI_OPCODE_LODQ
:
9353 return emit_lodq(emit
, inst
);
9354 case TGSI_OPCODE_LOG
:
9355 return emit_log(emit
, inst
);
9356 case TGSI_OPCODE_LRP
:
9357 return emit_lrp(emit
, inst
);
9358 case TGSI_OPCODE_POW
:
9359 return emit_pow(emit
, inst
);
9360 case TGSI_OPCODE_RCP
:
9361 return emit_rcp(emit
, inst
);
9362 case TGSI_OPCODE_RSQ
:
9363 return emit_rsq(emit
, inst
);
9364 case TGSI_OPCODE_SAMPLE
:
9365 return emit_sample(emit
, inst
);
9366 case TGSI_OPCODE_SEQ
:
9367 return emit_seq(emit
, inst
);
9368 case TGSI_OPCODE_SGE
:
9369 return emit_sge(emit
, inst
);
9370 case TGSI_OPCODE_SGT
:
9371 return emit_sgt(emit
, inst
);
9372 case TGSI_OPCODE_SIN
:
9373 return emit_sincos(emit
, inst
);
9374 case TGSI_OPCODE_SLE
:
9375 return emit_sle(emit
, inst
);
9376 case TGSI_OPCODE_SLT
:
9377 return emit_slt(emit
, inst
);
9378 case TGSI_OPCODE_SNE
:
9379 return emit_sne(emit
, inst
);
9380 case TGSI_OPCODE_SSG
:
9381 return emit_ssg(emit
, inst
);
9382 case TGSI_OPCODE_ISSG
:
9383 return emit_issg(emit
, inst
);
9384 case TGSI_OPCODE_TEX
:
9385 return emit_tex(emit
, inst
);
9386 case TGSI_OPCODE_TG4
:
9387 return emit_tg4(emit
, inst
);
9388 case TGSI_OPCODE_TEX2
:
9389 return emit_tex2(emit
, inst
);
9390 case TGSI_OPCODE_TXP
:
9391 return emit_txp(emit
, inst
);
9392 case TGSI_OPCODE_TXB
:
9393 case TGSI_OPCODE_TXB2
:
9394 case TGSI_OPCODE_TXL
:
9395 return emit_txl_txb(emit
, inst
);
9396 case TGSI_OPCODE_TXD
:
9397 return emit_txd(emit
, inst
);
9398 case TGSI_OPCODE_TXF
:
9399 return emit_txf(emit
, inst
);
9400 case TGSI_OPCODE_TXL2
:
9401 return emit_txl2(emit
, inst
);
9402 case TGSI_OPCODE_TXQ
:
9403 return emit_txq(emit
, inst
);
9404 case TGSI_OPCODE_UIF
:
9405 return emit_if(emit
, &inst
->Src
[0]);
9406 case TGSI_OPCODE_UMUL_HI
:
9407 case TGSI_OPCODE_IMUL_HI
:
9408 case TGSI_OPCODE_UDIV
:
9409 /* These cases use only the FIRST of two destination registers */
9410 return emit_simple_1dst(emit
, inst
, 2, 0);
9411 case TGSI_OPCODE_IDIV
:
9412 return emit_vmware(emit
, inst
, VGPU10_VMWARE_OPCODE_IDIV
);
9413 case TGSI_OPCODE_UMUL
:
9414 case TGSI_OPCODE_UMOD
:
9415 case TGSI_OPCODE_MOD
:
9416 /* These cases use only the SECOND of two destination registers */
9417 return emit_simple_1dst(emit
, inst
, 2, 1);
9419 /* Begin SM5 opcodes */
9420 case TGSI_OPCODE_DABS
:
9421 return emit_dabs(emit
, inst
);
9422 case TGSI_OPCODE_DNEG
:
9423 return emit_dneg(emit
, inst
);
9424 case TGSI_OPCODE_DRCP
:
9425 return emit_simple(emit
, inst
);
9426 case TGSI_OPCODE_DSQRT
:
9427 return emit_dsqrt(emit
, inst
);
9428 case TGSI_OPCODE_DMAD
:
9429 return emit_dmad(emit
, inst
);
9430 case TGSI_OPCODE_DFRAC
:
9431 return emit_vmware(emit
, inst
, VGPU10_VMWARE_OPCODE_DFRC
);
9432 case TGSI_OPCODE_D2I
:
9433 case TGSI_OPCODE_D2U
:
9434 return emit_simple(emit
, inst
);
9435 case TGSI_OPCODE_I2D
:
9436 case TGSI_OPCODE_U2D
:
9437 return emit_simple(emit
, inst
);
9438 case TGSI_OPCODE_DRSQ
:
9439 return emit_drsq(emit
, &inst
->Dst
[0], &inst
->Src
[0]);
9440 case TGSI_OPCODE_DDIV
:
9441 return emit_simple(emit
, inst
);
9442 case TGSI_OPCODE_INTERP_OFFSET
:
9443 return emit_interp_offset(emit
, inst
);
9445 /* The following opcodes should never be seen here. We return zero
9446 * for all the PIPE_CAP_TGSI_DROUND_SUPPORTED, DFRACEXP_DLDEXP_SUPPORTED,
9447 * FMA_SUPPORTED, LDEXP_SUPPORTED queries.
9449 case TGSI_OPCODE_FMA
:
9450 case TGSI_OPCODE_LDEXP
:
9451 case TGSI_OPCODE_DSSG
:
9452 case TGSI_OPCODE_DFRACEXP
:
9453 case TGSI_OPCODE_DLDEXP
:
9454 case TGSI_OPCODE_DTRUNC
:
9455 case TGSI_OPCODE_DCEIL
:
9456 case TGSI_OPCODE_DFLR
:
9457 debug_printf("Unexpected TGSI opcode %s. "
9458 "Should have been translated away by the GLSL compiler.\n",
9459 tgsi_get_opcode_name(opcode
));
9462 case TGSI_OPCODE_LOAD
:
9463 case TGSI_OPCODE_STORE
:
9464 case TGSI_OPCODE_ATOMAND
:
9465 case TGSI_OPCODE_ATOMCAS
:
9466 case TGSI_OPCODE_ATOMIMAX
:
9467 case TGSI_OPCODE_ATOMIMIN
:
9468 case TGSI_OPCODE_ATOMOR
:
9469 case TGSI_OPCODE_ATOMUADD
:
9470 case TGSI_OPCODE_ATOMUMAX
:
9471 case TGSI_OPCODE_ATOMUMIN
:
9472 case TGSI_OPCODE_ATOMXCHG
:
9473 case TGSI_OPCODE_ATOMXOR
:
9475 case TGSI_OPCODE_BARRIER
:
9476 if (emit
->unit
== PIPE_SHADER_TESS_CTRL
) {
9477 /* SM5 device doesn't support BARRIER in tcs . If barrier is used
9478 * in shader, don't do anything for this opcode and continue rest
9479 * of shader translation
9481 pipe_debug_message(&emit
->svga_debug_callback
, INFO
,
9482 "barrier instruction is not supported in tessellation control shader\n");
9486 return emit_simple(emit
, inst
);
9489 case TGSI_OPCODE_END
:
9490 if (!emit_post_helpers(emit
))
9492 return emit_simple(emit
, inst
);
9495 debug_printf("Unimplemented tgsi instruction %s\n",
9496 tgsi_get_opcode_name(opcode
));
9505 * Emit the extra instructions to adjust the vertex position.
9506 * There are two possible adjustments:
9507 * 1. Converting from Gallium to VGPU10 coordinate space by applying the
9508 * "prescale" and "pretranslate" values.
9509 * 2. Undoing the viewport transformation when we use the swtnl/draw path.
9510 * \param vs_pos_tmp_index which temporary register contains the vertex pos.
9513 emit_vpos_instructions(struct svga_shader_emitter_v10
*emit
)
9515 struct tgsi_full_src_register tmp_pos_src
;
9516 struct tgsi_full_dst_register pos_dst
;
9517 const unsigned vs_pos_tmp_index
= emit
->vposition
.tmp_index
;
9519 /* Don't bother to emit any extra vertex instructions if vertex position is
9522 if (emit
->vposition
.out_index
== INVALID_INDEX
)
9526 * Reset the temporary vertex position register index
9527 * so that emit_dst_register() will use the real vertex position output
9529 emit
->vposition
.tmp_index
= INVALID_INDEX
;
9531 tmp_pos_src
= make_src_temp_reg(vs_pos_tmp_index
);
9532 pos_dst
= make_dst_output_reg(emit
->vposition
.out_index
);
9534 /* If non-adjusted vertex position register index
9535 * is valid, copy the vertex position from the temporary
9536 * vertex position register before it is modified by the
9537 * prescale computation.
9539 if (emit
->vposition
.so_index
!= INVALID_INDEX
) {
9540 struct tgsi_full_dst_register pos_so_dst
=
9541 make_dst_output_reg(emit
->vposition
.so_index
);
9543 /* MOV pos_so, tmp_pos */
9544 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &pos_so_dst
, &tmp_pos_src
);
9547 if (emit
->vposition
.need_prescale
) {
9548 /* This code adjusts the vertex position to match the VGPU10 convention.
9549 * If p is the position computed by the shader (usually by applying the
9550 * modelview and projection matrices), the new position q is computed by:
9552 * q.x = p.w * trans.x + p.x * scale.x
9553 * q.y = p.w * trans.y + p.y * scale.y
9554 * q.z = p.w * trans.z + p.z * scale.z;
9555 * q.w = p.w * trans.w + p.w;
9557 struct tgsi_full_src_register tmp_pos_src_w
=
9558 scalar_src(&tmp_pos_src
, TGSI_SWIZZLE_W
);
9559 struct tgsi_full_dst_register tmp_pos_dst
=
9560 make_dst_temp_reg(vs_pos_tmp_index
);
9561 struct tgsi_full_dst_register tmp_pos_dst_xyz
=
9562 writemask_dst(&tmp_pos_dst
, TGSI_WRITEMASK_XYZ
);
9564 struct tgsi_full_src_register prescale_scale
=
9565 make_src_temp_reg(emit
->vposition
.prescale_scale_index
);
9566 struct tgsi_full_src_register prescale_trans
=
9567 make_src_temp_reg(emit
->vposition
.prescale_trans_index
);
9569 /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */
9570 emit_instruction_op2(emit
, VGPU10_OPCODE_MUL
, &tmp_pos_dst_xyz
,
9571 &tmp_pos_src
, &prescale_scale
);
9573 /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */
9574 emit_instruction_op3(emit
, VGPU10_OPCODE_MAD
, &pos_dst
, &tmp_pos_src_w
,
9575 &prescale_trans
, &tmp_pos_src
);
9577 else if (emit
->key
.vs
.undo_viewport
) {
9578 /* This code computes the final vertex position from the temporary
9579 * vertex position by undoing the viewport transformation and the
9580 * divide-by-W operation (we convert window coords back to clip coords).
9581 * This is needed when we use the 'draw' module for fallbacks.
9582 * If p is the temp pos in window coords, then the NDC coord q is:
9583 * q.x = (p.x - vp.x_trans) / vp.x_scale * p.w
9584 * q.y = (p.y - vp.y_trans) / vp.y_scale * p.w
9587 * CONST[vs_viewport_index] contains:
9588 * { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans }
9590 struct tgsi_full_dst_register tmp_pos_dst
=
9591 make_dst_temp_reg(vs_pos_tmp_index
);
9592 struct tgsi_full_dst_register tmp_pos_dst_xy
=
9593 writemask_dst(&tmp_pos_dst
, TGSI_WRITEMASK_XY
);
9594 struct tgsi_full_src_register tmp_pos_src_wwww
=
9595 scalar_src(&tmp_pos_src
, TGSI_SWIZZLE_W
);
9597 struct tgsi_full_dst_register pos_dst_xyz
=
9598 writemask_dst(&pos_dst
, TGSI_WRITEMASK_XYZ
);
9599 struct tgsi_full_dst_register pos_dst_w
=
9600 writemask_dst(&pos_dst
, TGSI_WRITEMASK_W
);
9602 struct tgsi_full_src_register vp_xyzw
=
9603 make_src_const_reg(emit
->vs
.viewport_index
);
9604 struct tgsi_full_src_register vp_zwww
=
9605 swizzle_src(&vp_xyzw
, TGSI_SWIZZLE_Z
, TGSI_SWIZZLE_W
,
9606 TGSI_SWIZZLE_W
, TGSI_SWIZZLE_W
);
9608 /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */
9609 emit_instruction_op2(emit
, VGPU10_OPCODE_ADD
, &tmp_pos_dst_xy
,
9610 &tmp_pos_src
, &vp_zwww
);
9612 /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */
9613 emit_instruction_op2(emit
, VGPU10_OPCODE_MUL
, &tmp_pos_dst_xy
,
9614 &tmp_pos_src
, &vp_xyzw
);
9616 /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */
9617 emit_instruction_op2(emit
, VGPU10_OPCODE_MUL
, &pos_dst_xyz
,
9618 &tmp_pos_src
, &tmp_pos_src_wwww
);
9620 /* MOV pos.w, tmp_pos.w */
9621 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &pos_dst_w
, &tmp_pos_src
);
9623 else if (vs_pos_tmp_index
!= INVALID_INDEX
) {
9624 /* This code is to handle the case where the temporary vertex
9625 * position register is created when the vertex shader has stream
9626 * output and prescale is disabled because rasterization is to be
9629 struct tgsi_full_dst_register pos_dst
=
9630 make_dst_output_reg(emit
->vposition
.out_index
);
9632 /* MOV pos, tmp_pos */
9633 begin_emit_instruction(emit
);
9634 emit_opcode(emit
, VGPU10_OPCODE_MOV
, FALSE
);
9635 emit_dst_register(emit
, &pos_dst
);
9636 emit_src_register(emit
, &tmp_pos_src
);
9637 end_emit_instruction(emit
);
9640 /* Restore original vposition.tmp_index value for the next GS vertex.
9641 * It doesn't matter for VS.
9643 emit
->vposition
.tmp_index
= vs_pos_tmp_index
;
9647 emit_clipping_instructions(struct svga_shader_emitter_v10
*emit
)
9649 if (emit
->clip_mode
== CLIP_DISTANCE
) {
9650 /* Copy from copy distance temporary to CLIPDIST & the shadow copy */
9651 emit_clip_distance_instructions(emit
);
9653 } else if (emit
->clip_mode
== CLIP_VERTEX
&&
9654 emit
->key
.last_vertex_stage
) {
9655 /* Convert TGSI CLIPVERTEX to CLIPDIST */
9656 emit_clip_vertex_instructions(emit
);
9660 * Emit vertex position and take care of legacy user planes only if
9661 * there is a valid vertex position register index.
9662 * This is to take care of the case
9663 * where the shader doesn't output vertex position. Then in
9664 * this case, don't bother to emit more vertex instructions.
9666 if (emit
->vposition
.out_index
== INVALID_INDEX
)
9670 * Emit per-vertex clipping instructions for legacy user defined clip planes.
9671 * NOTE: we must emit the clip distance instructions before the
9672 * emit_vpos_instructions() call since the later function will change
9673 * the TEMP[vs_pos_tmp_index] value.
9675 if (emit
->clip_mode
== CLIP_LEGACY
&& emit
->key
.last_vertex_stage
) {
9676 /* Emit CLIPDIST for legacy user defined clip planes */
9677 emit_clip_distance_from_vpos(emit
, emit
->vposition
.tmp_index
);
9683 * Emit extra per-vertex instructions. This includes clip-coordinate
9684 * space conversion and computing clip distances. This is called for
9685 * each GS emit-vertex instruction and at the end of VS translation.
9688 emit_vertex_instructions(struct svga_shader_emitter_v10
*emit
)
9690 /* Emit clipping instructions based on clipping mode */
9691 emit_clipping_instructions(emit
);
9693 /* Emit vertex position instructions */
9694 emit_vpos_instructions(emit
);
9699 * Translate the TGSI_OPCODE_EMIT GS instruction.
9702 emit_vertex(struct svga_shader_emitter_v10
*emit
,
9703 const struct tgsi_full_instruction
*inst
)
9705 unsigned ret
= TRUE
;
9707 assert(emit
->unit
== PIPE_SHADER_GEOMETRY
);
9710 * Emit the viewport array index for the first vertex.
9712 if (emit
->gs
.viewport_index_out_index
!= INVALID_INDEX
) {
9713 struct tgsi_full_dst_register viewport_index_out
=
9714 make_dst_output_reg(emit
->gs
.viewport_index_out_index
);
9715 struct tgsi_full_dst_register viewport_index_out_x
=
9716 writemask_dst(&viewport_index_out
, TGSI_WRITEMASK_X
);
9717 struct tgsi_full_src_register viewport_index_tmp
=
9718 make_src_temp_reg(emit
->gs
.viewport_index_tmp_index
);
9720 /* Set the out index to INVALID_INDEX, so it will not
9721 * be assigned to a temp again in emit_dst_register, and
9722 * the viewport index will not be assigned again in the
9723 * subsequent vertices.
9725 emit
->gs
.viewport_index_out_index
= INVALID_INDEX
;
9726 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
,
9727 &viewport_index_out_x
, &viewport_index_tmp
);
9731 * Find the stream index associated with this emit vertex instruction.
9733 assert(inst
->Src
[0].Register
.File
== TGSI_FILE_IMMEDIATE
);
9734 unsigned streamIndex
= find_stream_index(emit
, &inst
->Src
[0]);
9737 * According to the ARB_gpu_shader5 spec, the built-in geometry shader
9738 * outputs are always associated with vertex stream zero.
9739 * So emit the extra vertex instructions for position or clip distance
9740 * for stream zero only.
9742 if (streamIndex
== 0) {
9744 * Before emitting vertex instructions, emit the temporaries for
9745 * the prescale constants based on the viewport index if needed.
9747 if (emit
->vposition
.need_prescale
&& !emit
->vposition
.have_prescale
)
9748 emit_temp_prescale_instructions(emit
);
9750 emit_vertex_instructions(emit
);
9753 begin_emit_instruction(emit
);
9754 if (emit
->version
>= 50) {
9755 if (emit
->info
.num_stream_output_components
[streamIndex
] == 0) {
9757 * If there is no output for this stream, discard this instruction.
9759 emit
->discard_instruction
= TRUE
;
9762 emit_opcode(emit
, VGPU10_OPCODE_EMIT_STREAM
, FALSE
);
9763 emit_stream_register(emit
, streamIndex
);
9767 emit_opcode(emit
, VGPU10_OPCODE_EMIT
, FALSE
);
9769 end_emit_instruction(emit
);
9776 * Emit the extra code to convert from VGPU10's boolean front-face
9777 * register to TGSI's signed front-face register.
9779 * TODO: Make temporary front-face register a scalar.
9782 emit_frontface_instructions(struct svga_shader_emitter_v10
*emit
)
9784 assert(emit
->unit
== PIPE_SHADER_FRAGMENT
);
9786 if (emit
->fs
.face_input_index
!= INVALID_INDEX
) {
9787 /* convert vgpu10 boolean face register to gallium +/-1 value */
9788 struct tgsi_full_dst_register tmp_dst
=
9789 make_dst_temp_reg(emit
->fs
.face_tmp_index
);
9790 struct tgsi_full_src_register one
=
9791 make_immediate_reg_float(emit
, 1.0f
);
9792 struct tgsi_full_src_register neg_one
=
9793 make_immediate_reg_float(emit
, -1.0f
);
9795 /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */
9796 begin_emit_instruction(emit
);
9797 emit_opcode(emit
, VGPU10_OPCODE_MOVC
, FALSE
);
9798 emit_dst_register(emit
, &tmp_dst
);
9799 emit_face_register(emit
);
9800 emit_src_register(emit
, &one
);
9801 emit_src_register(emit
, &neg_one
);
9802 end_emit_instruction(emit
);
9808 * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w.
9811 emit_fragcoord_instructions(struct svga_shader_emitter_v10
*emit
)
9813 assert(emit
->unit
== PIPE_SHADER_FRAGMENT
);
9815 if (emit
->fs
.fragcoord_input_index
!= INVALID_INDEX
) {
9816 struct tgsi_full_dst_register tmp_dst
=
9817 make_dst_temp_reg(emit
->fs
.fragcoord_tmp_index
);
9818 struct tgsi_full_dst_register tmp_dst_xyz
=
9819 writemask_dst(&tmp_dst
, TGSI_WRITEMASK_XYZ
);
9820 struct tgsi_full_dst_register tmp_dst_w
=
9821 writemask_dst(&tmp_dst
, TGSI_WRITEMASK_W
);
9822 struct tgsi_full_src_register one
=
9823 make_immediate_reg_float(emit
, 1.0f
);
9824 struct tgsi_full_src_register fragcoord
=
9825 make_src_reg(TGSI_FILE_INPUT
, emit
->fs
.fragcoord_input_index
);
9827 /* save the input index */
9828 unsigned fragcoord_input_index
= emit
->fs
.fragcoord_input_index
;
9829 /* set to invalid to prevent substitution in emit_src_register() */
9830 emit
->fs
.fragcoord_input_index
= INVALID_INDEX
;
9832 /* MOV fragcoord_tmp.xyz, fragcoord.xyz */
9833 begin_emit_instruction(emit
);
9834 emit_opcode(emit
, VGPU10_OPCODE_MOV
, FALSE
);
9835 emit_dst_register(emit
, &tmp_dst_xyz
);
9836 emit_src_register(emit
, &fragcoord
);
9837 end_emit_instruction(emit
);
9839 /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */
9840 begin_emit_instruction(emit
);
9841 emit_opcode(emit
, VGPU10_OPCODE_DIV
, FALSE
);
9842 emit_dst_register(emit
, &tmp_dst_w
);
9843 emit_src_register(emit
, &one
);
9844 emit_src_register(emit
, &fragcoord
);
9845 end_emit_instruction(emit
);
9847 /* restore saved value */
9848 emit
->fs
.fragcoord_input_index
= fragcoord_input_index
;
9854 * Emit the extra code to get the current sample position value and
9855 * put it into a temp register.
9858 emit_sample_position_instructions(struct svga_shader_emitter_v10
*emit
)
9860 assert(emit
->unit
== PIPE_SHADER_FRAGMENT
);
9862 if (emit
->fs
.sample_pos_sys_index
!= INVALID_INDEX
) {
9863 assert(emit
->version
>= 41);
9865 struct tgsi_full_dst_register tmp_dst
=
9866 make_dst_temp_reg(emit
->fs
.sample_pos_tmp_index
);
9867 struct tgsi_full_src_register half
=
9868 make_immediate_reg_float4(emit
, 0.5, 0.5, 0.0, 0.0);
9870 struct tgsi_full_src_register tmp_src
=
9871 make_src_temp_reg(emit
->fs
.sample_pos_tmp_index
);
9872 struct tgsi_full_src_register sample_index_reg
=
9873 make_src_scalar_reg(TGSI_FILE_SYSTEM_VALUE
,
9874 emit
->fs
.sample_id_sys_index
, TGSI_SWIZZLE_X
);
9876 /* The first src register is a shader resource (if we want a
9877 * multisampled resource sample position) or the rasterizer register
9878 * (if we want the current sample position in the color buffer). We
9882 /* SAMPLE_POS dst, RASTERIZER, sampleIndex */
9883 begin_emit_instruction(emit
);
9884 emit_opcode(emit
, VGPU10_OPCODE_SAMPLE_POS
, FALSE
);
9885 emit_dst_register(emit
, &tmp_dst
);
9886 emit_rasterizer_register(emit
);
9887 emit_src_register(emit
, &sample_index_reg
);
9888 end_emit_instruction(emit
);
9890 /* Convert from D3D coords to GL coords by adding 0.5 bias */
9891 /* ADD dst, dst, half */
9892 begin_emit_instruction(emit
);
9893 emit_opcode(emit
, VGPU10_OPCODE_ADD
, FALSE
);
9894 emit_dst_register(emit
, &tmp_dst
);
9895 emit_src_register(emit
, &tmp_src
);
9896 emit_src_register(emit
, &half
);
9897 end_emit_instruction(emit
);
9903 * Emit extra instructions to adjust VS inputs/attributes. This can
9904 * mean casting a vertex attribute from int to float or setting the
9905 * W component to 1, or both.
9908 emit_vertex_attrib_instructions(struct svga_shader_emitter_v10
*emit
)
9910 const unsigned save_w_1_mask
= emit
->key
.vs
.adjust_attrib_w_1
;
9911 const unsigned save_itof_mask
= emit
->key
.vs
.adjust_attrib_itof
;
9912 const unsigned save_utof_mask
= emit
->key
.vs
.adjust_attrib_utof
;
9913 const unsigned save_is_bgra_mask
= emit
->key
.vs
.attrib_is_bgra
;
9914 const unsigned save_puint_to_snorm_mask
= emit
->key
.vs
.attrib_puint_to_snorm
;
9915 const unsigned save_puint_to_uscaled_mask
= emit
->key
.vs
.attrib_puint_to_uscaled
;
9916 const unsigned save_puint_to_sscaled_mask
= emit
->key
.vs
.attrib_puint_to_sscaled
;
9918 unsigned adjust_mask
= (save_w_1_mask
|
9922 save_puint_to_snorm_mask
|
9923 save_puint_to_uscaled_mask
|
9924 save_puint_to_sscaled_mask
);
9926 assert(emit
->unit
== PIPE_SHADER_VERTEX
);
9929 struct tgsi_full_src_register one
=
9930 make_immediate_reg_float(emit
, 1.0f
);
9932 struct tgsi_full_src_register one_int
=
9933 make_immediate_reg_int(emit
, 1);
9935 /* We need to turn off these bitmasks while emitting the
9936 * instructions below, then restore them afterward.
9938 emit
->key
.vs
.adjust_attrib_w_1
= 0;
9939 emit
->key
.vs
.adjust_attrib_itof
= 0;
9940 emit
->key
.vs
.adjust_attrib_utof
= 0;
9941 emit
->key
.vs
.attrib_is_bgra
= 0;
9942 emit
->key
.vs
.attrib_puint_to_snorm
= 0;
9943 emit
->key
.vs
.attrib_puint_to_uscaled
= 0;
9944 emit
->key
.vs
.attrib_puint_to_sscaled
= 0;
9946 while (adjust_mask
) {
9947 unsigned index
= u_bit_scan(&adjust_mask
);
9949 /* skip the instruction if this vertex attribute is not being used */
9950 if (emit
->info
.input_usage_mask
[index
] == 0)
9953 unsigned tmp
= emit
->vs
.adjusted_input
[index
];
9954 struct tgsi_full_src_register input_src
=
9955 make_src_reg(TGSI_FILE_INPUT
, index
);
9957 struct tgsi_full_dst_register tmp_dst
= make_dst_temp_reg(tmp
);
9958 struct tgsi_full_src_register tmp_src
= make_src_temp_reg(tmp
);
9959 struct tgsi_full_dst_register tmp_dst_w
=
9960 writemask_dst(&tmp_dst
, TGSI_WRITEMASK_W
);
9962 /* ITOF/UTOF/MOV tmp, input[index] */
9963 if (save_itof_mask
& (1 << index
)) {
9964 emit_instruction_op1(emit
, VGPU10_OPCODE_ITOF
,
9965 &tmp_dst
, &input_src
);
9967 else if (save_utof_mask
& (1 << index
)) {
9968 emit_instruction_op1(emit
, VGPU10_OPCODE_UTOF
,
9969 &tmp_dst
, &input_src
);
9971 else if (save_puint_to_snorm_mask
& (1 << index
)) {
9972 emit_puint_to_snorm(emit
, &tmp_dst
, &input_src
);
9974 else if (save_puint_to_uscaled_mask
& (1 << index
)) {
9975 emit_puint_to_uscaled(emit
, &tmp_dst
, &input_src
);
9977 else if (save_puint_to_sscaled_mask
& (1 << index
)) {
9978 emit_puint_to_sscaled(emit
, &tmp_dst
, &input_src
);
9981 assert((save_w_1_mask
| save_is_bgra_mask
) & (1 << index
));
9982 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
,
9983 &tmp_dst
, &input_src
);
9986 if (save_is_bgra_mask
& (1 << index
)) {
9987 emit_swap_r_b(emit
, &tmp_dst
, &tmp_src
);
9990 if (save_w_1_mask
& (1 << index
)) {
9991 /* MOV tmp.w, 1.0 */
9992 if (emit
->key
.vs
.attrib_is_pure_int
& (1 << index
)) {
9993 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
,
9994 &tmp_dst_w
, &one_int
);
9997 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
,
10003 emit
->key
.vs
.adjust_attrib_w_1
= save_w_1_mask
;
10004 emit
->key
.vs
.adjust_attrib_itof
= save_itof_mask
;
10005 emit
->key
.vs
.adjust_attrib_utof
= save_utof_mask
;
10006 emit
->key
.vs
.attrib_is_bgra
= save_is_bgra_mask
;
10007 emit
->key
.vs
.attrib_puint_to_snorm
= save_puint_to_snorm_mask
;
10008 emit
->key
.vs
.attrib_puint_to_uscaled
= save_puint_to_uscaled_mask
;
10009 emit
->key
.vs
.attrib_puint_to_sscaled
= save_puint_to_sscaled_mask
;
10014 /* Find zero-value immedate for default layer index */
10016 emit_default_layer_instructions(struct svga_shader_emitter_v10
*emit
)
10018 assert(emit
->unit
== PIPE_SHADER_FRAGMENT
);
10020 /* immediate for default layer index 0 */
10021 if (emit
->fs
.layer_input_index
!= INVALID_INDEX
) {
10022 union tgsi_immediate_data imm
;
10024 emit
->fs
.layer_imm_index
= find_immediate(emit
, imm
, 0);
10030 emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10
*emit
,
10031 unsigned cbuf_index
,
10032 struct tgsi_full_dst_register
*scale
,
10033 struct tgsi_full_dst_register
*translate
)
10035 struct tgsi_full_src_register scale_cbuf
= make_src_const_reg(cbuf_index
);
10036 struct tgsi_full_src_register trans_cbuf
= make_src_const_reg(cbuf_index
+1);
10038 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, scale
, &scale_cbuf
);
10039 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, translate
, &trans_cbuf
);
10044 * A recursive helper function to find the prescale from the constant buffer
10047 find_prescale_from_cbuf(struct svga_shader_emitter_v10
*emit
,
10048 unsigned index
, unsigned num_prescale
,
10049 struct tgsi_full_src_register
*vp_index
,
10050 struct tgsi_full_dst_register
*scale
,
10051 struct tgsi_full_dst_register
*translate
,
10052 struct tgsi_full_src_register
*tmp_src
,
10053 struct tgsi_full_dst_register
*tmp_dst
)
10055 if (num_prescale
== 0)
10060 emit_instruction_op0(emit
, VGPU10_OPCODE_ELSE
);
10063 struct tgsi_full_src_register index_src
=
10064 make_immediate_reg_int(emit
, index
);
10067 /* GE tmp, vp_index, index */
10068 emit_instruction_op2(emit
, VGPU10_OPCODE_GE
, tmp_dst
,
10069 vp_index
, &index_src
);
10071 /* EQ tmp, vp_index, index */
10072 emit_instruction_op2(emit
, VGPU10_OPCODE_EQ
, tmp_dst
,
10073 vp_index
, &index_src
);
10077 emit_if(emit
, tmp_src
);
10078 emit_temp_prescale_from_cbuf(emit
,
10079 emit
->vposition
.prescale_cbuf_index
+ 2 * index
,
10082 find_prescale_from_cbuf(emit
, index
+1, num_prescale
-1,
10083 vp_index
, scale
, translate
,
10087 emit_instruction_op0(emit
, VGPU10_OPCODE_ENDIF
);
10092 * This helper function emits instructions to set the prescale
10093 * and translate temporaries to the correct constants from the
10094 * constant buffer according to the designated viewport.
10097 emit_temp_prescale_instructions(struct svga_shader_emitter_v10
*emit
)
10099 struct tgsi_full_dst_register prescale_scale
=
10100 make_dst_temp_reg(emit
->vposition
.prescale_scale_index
);
10101 struct tgsi_full_dst_register prescale_translate
=
10102 make_dst_temp_reg(emit
->vposition
.prescale_trans_index
);
10104 unsigned prescale_cbuf_index
= emit
->vposition
.prescale_cbuf_index
;
10106 if (emit
->vposition
.num_prescale
== 1) {
10107 emit_temp_prescale_from_cbuf(emit
,
10108 prescale_cbuf_index
,
10109 &prescale_scale
, &prescale_translate
);
10112 * Since SM5 device does not support dynamic indexing, we need
10113 * to do the if-else to find the prescale constants for the
10114 * specified viewport.
10116 struct tgsi_full_src_register vp_index_src
=
10117 make_src_temp_reg(emit
->gs
.viewport_index_tmp_index
);
10119 struct tgsi_full_src_register vp_index_src_x
=
10120 scalar_src(&vp_index_src
, TGSI_SWIZZLE_X
);
10122 unsigned tmp
= get_temp_index(emit
);
10123 struct tgsi_full_src_register tmp_src
= make_src_temp_reg(tmp
);
10124 struct tgsi_full_src_register tmp_src_x
=
10125 scalar_src(&tmp_src
, TGSI_SWIZZLE_X
);
10126 struct tgsi_full_dst_register tmp_dst
= make_dst_temp_reg(tmp
);
10128 find_prescale_from_cbuf(emit
, 0, emit
->vposition
.num_prescale
,
10130 &prescale_scale
, &prescale_translate
,
10131 &tmp_src_x
, &tmp_dst
);
10134 /* Mark prescale temporaries are emitted */
10135 emit
->vposition
.have_prescale
= 1;
10140 * A helper function to emit an instruction in a vertex shader to add a bias
10141 * to the VertexID system value. This patches the VertexID in the SVGA vertex
10142 * shader to include the base vertex of an indexed primitive or the start index
10143 * of a non-indexed primitive.
10146 emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10
*emit
)
10148 struct tgsi_full_src_register vertex_id_bias_index
=
10149 make_src_const_reg(emit
->vs
.vertex_id_bias_index
);
10150 struct tgsi_full_src_register vertex_id_sys_src
=
10151 make_src_reg(TGSI_FILE_SYSTEM_VALUE
, emit
->vs
.vertex_id_sys_index
);
10152 struct tgsi_full_src_register vertex_id_sys_src_x
=
10153 scalar_src(&vertex_id_sys_src
, TGSI_SWIZZLE_X
);
10154 struct tgsi_full_dst_register vertex_id_tmp_dst
=
10155 make_dst_temp_reg(emit
->vs
.vertex_id_tmp_index
);
10157 /* IADD vertex_id_tmp, vertex_id_sys, vertex_id_bias */
10158 unsigned vertex_id_tmp_index
= emit
->vs
.vertex_id_tmp_index
;
10159 emit
->vs
.vertex_id_tmp_index
= INVALID_INDEX
;
10160 emit_instruction_opn(emit
, VGPU10_OPCODE_IADD
, &vertex_id_tmp_dst
,
10161 &vertex_id_sys_src_x
, &vertex_id_bias_index
, NULL
, FALSE
,
10163 emit
->vs
.vertex_id_tmp_index
= vertex_id_tmp_index
;
10167 * Hull Shader must have control point outputs. But tessellation
10168 * control shader can return without writing to control point output.
10169 * In this case, the control point output is assumed to be passthrough
10170 * from the control point input.
10171 * This helper function is to write out a control point output first in case
10172 * the tessellation control shader returns before writing a
10173 * control point output.
10176 emit_tcs_default_control_point_output(struct svga_shader_emitter_v10
*emit
)
10178 assert(emit
->unit
== PIPE_SHADER_TESS_CTRL
);
10179 assert(emit
->tcs
.control_point_phase
);
10180 assert(emit
->tcs
.control_point_input_index
!= INVALID_INDEX
);
10181 assert(emit
->tcs
.control_point_out_index
!= INVALID_INDEX
);
10182 assert(emit
->tcs
.invocation_id_sys_index
!= INVALID_INDEX
);
10184 /* UARL ADDR[INDEX].x INVOCATION.xxxx */
10186 struct tgsi_full_src_register invocation_src
;
10187 struct tgsi_full_dst_register addr_dst
;
10188 struct tgsi_full_dst_register addr_dst_x
;
10191 addr_tmp
= emit
->address_reg_index
[emit
->tcs
.control_point_addr_index
];
10192 addr_dst
= make_dst_temp_reg(addr_tmp
);
10193 addr_dst_x
= writemask_dst(&addr_dst
, TGSI_WRITEMASK_X
);
10195 invocation_src
= make_src_reg(TGSI_FILE_SYSTEM_VALUE
,
10196 emit
->tcs
.invocation_id_sys_index
);
10198 begin_emit_instruction(emit
);
10199 emit_opcode_precise(emit
, VGPU10_OPCODE_MOV
, FALSE
, FALSE
);
10200 emit_dst_register(emit
, &addr_dst_x
);
10201 emit_src_register(emit
, &invocation_src
);
10202 end_emit_instruction(emit
);
10205 /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */
10207 struct tgsi_full_src_register input_control_point
;
10208 struct tgsi_full_dst_register output_control_point
;
10210 input_control_point
= make_src_reg(TGSI_FILE_INPUT
,
10211 emit
->tcs
.control_point_input_index
);
10212 input_control_point
.Register
.Dimension
= 1;
10213 input_control_point
.Dimension
.Indirect
= 1;
10214 input_control_point
.DimIndirect
.File
= TGSI_FILE_ADDRESS
;
10215 input_control_point
.DimIndirect
.Index
= emit
->tcs
.control_point_addr_index
;
10216 output_control_point
=
10217 make_dst_output_reg(emit
->tcs
.control_point_out_index
);
10219 begin_emit_instruction(emit
);
10220 emit_opcode_precise(emit
, VGPU10_OPCODE_MOV
, FALSE
, FALSE
);
10221 emit_dst_register(emit
, &output_control_point
);
10222 emit_src_register(emit
, &input_control_point
);
10223 end_emit_instruction(emit
);
10227 * This functions constructs temporary tessfactor from VGPU10*_TESSFACTOR
10228 * values in domain shader. SM5 has tessfactors as floating point values where
10229 * as tgsi emit them as vector. This function allows to construct temp
10230 * tessfactor vector similar to TGSI_SEMANTIC_TESSINNER/OUTER filled with
10231 * values from VGPU10*_TESSFACTOR. Use this constructed vector whenever
10232 * TGSI_SEMANTIC_TESSINNER/OUTER is used in shader.
10235 emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10
*emit
)
10237 struct tgsi_full_src_register src
;
10238 struct tgsi_full_dst_register dst
;
10240 if (emit
->tes
.inner
.tgsi_index
!= INVALID_INDEX
) {
10241 dst
= make_dst_temp_reg(emit
->tes
.inner
.temp_index
);
10243 switch (emit
->tes
.prim_mode
) {
10244 case PIPE_PRIM_QUADS
:
10245 src
= make_src_scalar_reg(TGSI_FILE_INPUT
,
10246 emit
->tes
.inner
.in_index
+ 1, TGSI_SWIZZLE_X
);
10247 dst
= writemask_dst(&dst
, TGSI_WRITEMASK_Y
);
10248 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &dst
, &src
);
10250 case PIPE_PRIM_TRIANGLES
:
10251 src
= make_src_scalar_reg(TGSI_FILE_INPUT
,
10252 emit
->tes
.inner
.in_index
, TGSI_SWIZZLE_X
);
10253 dst
= writemask_dst(&dst
, TGSI_WRITEMASK_X
);
10254 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &dst
, &src
);
10256 case PIPE_PRIM_LINES
:
10258 * As per SM5 spec, InsideTessFactor for isolines are unused.
10259 * In fact glsl tessInnerLevel for isolines doesn't mean anything but if
10260 * any application try to read tessInnerLevel in TES when primitive type
10261 * is isolines, then instead of driver throwing segfault for accesing it,
10262 * return atleast vec(1.0f)
10264 src
= make_immediate_reg_float(emit
, 1.0f
);
10265 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &dst
, &src
);
10272 if (emit
->tes
.outer
.tgsi_index
!= INVALID_INDEX
) {
10273 dst
= make_dst_temp_reg(emit
->tes
.outer
.temp_index
);
10275 switch (emit
->tes
.prim_mode
) {
10276 case PIPE_PRIM_QUADS
:
10277 src
= make_src_scalar_reg(TGSI_FILE_INPUT
,
10278 emit
->tes
.outer
.in_index
+ 3, TGSI_SWIZZLE_X
);
10279 dst
= writemask_dst(&dst
, TGSI_WRITEMASK_W
);
10280 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &dst
, &src
);
10282 case PIPE_PRIM_TRIANGLES
:
10283 src
= make_src_scalar_reg(TGSI_FILE_INPUT
,
10284 emit
->tes
.outer
.in_index
+ 2, TGSI_SWIZZLE_X
);
10285 dst
= writemask_dst(&dst
, TGSI_WRITEMASK_Z
);
10286 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &dst
, &src
);
10288 case PIPE_PRIM_LINES
:
10289 src
= make_src_scalar_reg(TGSI_FILE_INPUT
,
10290 emit
->tes
.outer
.in_index
+ 1, TGSI_SWIZZLE_X
);
10291 dst
= writemask_dst(&dst
, TGSI_WRITEMASK_Y
);
10292 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &dst
, &src
);
10294 src
= make_src_scalar_reg(TGSI_FILE_INPUT
,
10295 emit
->tes
.outer
.in_index
, TGSI_SWIZZLE_X
);
10296 dst
= writemask_dst(&dst
, TGSI_WRITEMASK_X
);
10297 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &dst
, &src
);
10308 emit_initialize_temp_instruction(struct svga_shader_emitter_v10
*emit
)
10310 struct tgsi_full_src_register src
;
10311 struct tgsi_full_dst_register dst
;
10312 unsigned vgpu10_temp_index
= remap_temp_index(emit
, TGSI_FILE_TEMPORARY
,
10313 emit
->initialize_temp_index
);
10314 src
= make_immediate_reg_float(emit
, 0.0f
);
10315 dst
= make_dst_temp_reg(vgpu10_temp_index
);
10316 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &dst
, &src
);
10317 emit
->temp_map
[emit
->initialize_temp_index
].initialized
= TRUE
;
10318 emit
->initialize_temp_index
= INVALID_INDEX
;
10323 * Emit any extra/helper declarations/code that we might need between
10324 * the declaration section and code section.
10327 emit_pre_helpers(struct svga_shader_emitter_v10
*emit
)
10330 if (emit
->unit
== PIPE_SHADER_GEOMETRY
)
10331 emit_property_instructions(emit
);
10332 else if (emit
->unit
== PIPE_SHADER_TESS_CTRL
) {
10333 emit_hull_shader_declarations(emit
);
10335 /* Save the position of the first instruction token so that we can
10336 * do a second pass of the instructions for the patch constant phase.
10338 emit
->tcs
.instruction_token_pos
= emit
->cur_tgsi_token
;
10339 emit
->tcs
.fork_phase_add_signature
= FALSE
;
10341 if (!emit_hull_shader_control_point_phase(emit
)) {
10342 emit
->skip_instruction
= TRUE
;
10346 /* Set the current tcs phase to control point phase */
10347 emit
->tcs
.control_point_phase
= TRUE
;
10349 else if (emit
->unit
== PIPE_SHADER_TESS_EVAL
) {
10350 emit_domain_shader_declarations(emit
);
10353 /* Declare inputs */
10354 if (!emit_input_declarations(emit
))
10357 /* Declare outputs */
10358 if (!emit_output_declarations(emit
))
10361 /* Declare temporary registers */
10362 emit_temporaries_declaration(emit
);
10364 /* For PIPE_SHADER_TESS_CTRL, constants, samplers, resources and immediates
10365 * will already be declared in hs_decls (emit_hull_shader_declarations)
10367 if (emit
->unit
!= PIPE_SHADER_TESS_CTRL
) {
10368 /* Declare constant registers */
10369 emit_constant_declaration(emit
);
10371 /* Declare samplers and resources */
10372 emit_sampler_declarations(emit
);
10373 emit_resource_declarations(emit
);
10375 alloc_common_immediates(emit
);
10376 /* Now, emit the constant block containing all the immediates
10377 * declared by shader, as well as the extra ones seen above.
10381 if (emit
->unit
!= PIPE_SHADER_FRAGMENT
) {
10383 * Declare clip distance output registers for ClipVertex or
10384 * user defined planes
10386 emit_clip_distance_declarations(emit
);
10389 if (emit
->unit
== PIPE_SHADER_FRAGMENT
&&
10390 emit
->key
.fs
.alpha_func
!= SVGA3D_CMP_ALWAYS
) {
10391 float alpha
= emit
->key
.fs
.alpha_ref
;
10392 emit
->fs
.alpha_ref_index
=
10393 alloc_immediate_float4(emit
, alpha
, alpha
, alpha
, alpha
);
10396 if (emit
->unit
!= PIPE_SHADER_TESS_CTRL
) {
10398 * For PIPE_SHADER_TESS_CTRL, immediates are already declared in
10401 emit_vgpu10_immediates_block(emit
);
10404 emit_tcs_default_control_point_output(emit
);
10407 if (emit
->unit
== PIPE_SHADER_FRAGMENT
) {
10408 emit_frontface_instructions(emit
);
10409 emit_fragcoord_instructions(emit
);
10410 emit_sample_position_instructions(emit
);
10411 emit_default_layer_instructions(emit
);
10413 else if (emit
->unit
== PIPE_SHADER_VERTEX
) {
10414 emit_vertex_attrib_instructions(emit
);
10416 if (emit
->info
.uses_vertexid
)
10417 emit_vertex_id_nobase_instruction(emit
);
10419 else if (emit
->unit
== PIPE_SHADER_TESS_EVAL
) {
10420 emit_temp_tessfactor_instructions(emit
);
10424 * For geometry shader that writes to viewport index, the prescale
10425 * temporaries will be done at the first vertex emission.
10427 if (emit
->vposition
.need_prescale
&& emit
->vposition
.num_prescale
== 1)
10428 emit_temp_prescale_instructions(emit
);
10435 * The device has no direct support for the pipe_blend_state::alpha_to_one
10436 * option so we implement it here with shader code.
10438 * Note that this is kind of pointless, actually. Here we're clobbering
10439 * the alpha value with 1.0. So if alpha-to-coverage is enabled, we'll wind
10440 * up with 100% coverage. That's almost certainly not what the user wants.
10441 * The work-around is to add extra shader code to compute coverage from alpha
10442 * and write it to the coverage output register (if the user's shader doesn't
10443 * do so already). We'll probably do that in the future.
10446 emit_alpha_to_one_instructions(struct svga_shader_emitter_v10
*emit
,
10447 unsigned fs_color_tmp_index
)
10449 struct tgsi_full_src_register one
= make_immediate_reg_float(emit
, 1.0f
);
10452 /* Note: it's not 100% clear from the spec if we're supposed to clobber
10453 * the alpha for all render targets. But that's what NVIDIA does and
10454 * that's what Piglit tests.
10456 for (i
= 0; i
< emit
->fs
.num_color_outputs
; i
++) {
10457 struct tgsi_full_dst_register color_dst
;
10459 if (fs_color_tmp_index
!= INVALID_INDEX
&& i
== 0) {
10460 /* write to the temp color register */
10461 color_dst
= make_dst_temp_reg(fs_color_tmp_index
);
10464 /* write directly to the color[i] output */
10465 color_dst
= make_dst_output_reg(emit
->fs
.color_out_index
[i
]);
10468 color_dst
= writemask_dst(&color_dst
, TGSI_WRITEMASK_W
);
10470 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &color_dst
, &one
);
10476 * Emit alpha test code. This compares TEMP[fs_color_tmp_index].w
10477 * against the alpha reference value and discards the fragment if the
10478 * comparison fails.
10481 emit_alpha_test_instructions(struct svga_shader_emitter_v10
*emit
,
10482 unsigned fs_color_tmp_index
)
10484 /* compare output color's alpha to alpha ref and kill */
10485 unsigned tmp
= get_temp_index(emit
);
10486 struct tgsi_full_src_register tmp_src
= make_src_temp_reg(tmp
);
10487 struct tgsi_full_src_register tmp_src_x
=
10488 scalar_src(&tmp_src
, TGSI_SWIZZLE_X
);
10489 struct tgsi_full_dst_register tmp_dst
= make_dst_temp_reg(tmp
);
10490 struct tgsi_full_src_register color_src
=
10491 make_src_temp_reg(fs_color_tmp_index
);
10492 struct tgsi_full_src_register color_src_w
=
10493 scalar_src(&color_src
, TGSI_SWIZZLE_W
);
10494 struct tgsi_full_src_register ref_src
=
10495 make_src_immediate_reg(emit
->fs
.alpha_ref_index
);
10496 struct tgsi_full_dst_register color_dst
=
10497 make_dst_output_reg(emit
->fs
.color_out_index
[0]);
10499 assert(emit
->unit
== PIPE_SHADER_FRAGMENT
);
10501 /* dst = src0 'alpha_func' src1 */
10502 emit_comparison(emit
, emit
->key
.fs
.alpha_func
, &tmp_dst
,
10503 &color_src_w
, &ref_src
);
10505 /* DISCARD if dst.x == 0 */
10506 begin_emit_instruction(emit
);
10507 emit_discard_opcode(emit
, FALSE
); /* discard if src0.x is zero */
10508 emit_src_register(emit
, &tmp_src_x
);
10509 end_emit_instruction(emit
);
10511 /* If we don't need to broadcast the color below, emit the final color here.
10513 if (emit
->key
.fs
.write_color0_to_n_cbufs
<= 1) {
10514 /* MOV output.color, tempcolor */
10515 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &color_dst
, &color_src
);
10518 free_temp_indexes(emit
);
10523 * Emit instructions for writing a single color output to multiple
10525 * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or
10526 * when key.fs.white_fragments is true).
10527 * property is set and the number of render targets is greater than one.
10528 * \param fs_color_tmp_index index of the temp register that holds the
10529 * color to broadcast.
10532 emit_broadcast_color_instructions(struct svga_shader_emitter_v10
*emit
,
10533 unsigned fs_color_tmp_index
)
10535 const unsigned n
= emit
->key
.fs
.write_color0_to_n_cbufs
;
10537 struct tgsi_full_src_register color_src
;
10539 if (emit
->key
.fs
.white_fragments
) {
10540 /* set all color outputs to white */
10541 color_src
= make_immediate_reg_float(emit
, 1.0f
);
10544 /* set all color outputs to TEMP[fs_color_tmp_index] */
10545 assert(fs_color_tmp_index
!= INVALID_INDEX
);
10546 color_src
= make_src_temp_reg(fs_color_tmp_index
);
10549 assert(emit
->unit
== PIPE_SHADER_FRAGMENT
);
10551 for (i
= 0; i
< n
; i
++) {
10552 unsigned output_reg
= emit
->fs
.color_out_index
[i
];
10553 struct tgsi_full_dst_register color_dst
=
10554 make_dst_output_reg(output_reg
);
10556 /* Fill in this semantic here since we'll use it later in
10557 * emit_dst_register().
10559 emit
->info
.output_semantic_name
[output_reg
] = TGSI_SEMANTIC_COLOR
;
10561 /* MOV output.color[i], tempcolor */
10562 emit_instruction_op1(emit
, VGPU10_OPCODE_MOV
, &color_dst
, &color_src
);
10568 * Emit extra helper code after the original shader code, but before the
10569 * last END/RET instruction.
10570 * For vertex shaders this means emitting the extra code to apply the
10571 * prescale scale/translation.
10574 emit_post_helpers(struct svga_shader_emitter_v10
*emit
)
10576 if (emit
->unit
== PIPE_SHADER_VERTEX
) {
10577 emit_vertex_instructions(emit
);
10579 else if (emit
->unit
== PIPE_SHADER_FRAGMENT
) {
10580 const unsigned fs_color_tmp_index
= emit
->fs
.color_tmp_index
;
10582 assert(!(emit
->key
.fs
.white_fragments
&&
10583 emit
->key
.fs
.write_color0_to_n_cbufs
== 0));
10585 /* We no longer want emit_dst_register() to substitute the
10586 * temporary fragment color register for the real color output.
10588 emit
->fs
.color_tmp_index
= INVALID_INDEX
;
10590 if (emit
->key
.fs
.alpha_to_one
) {
10591 emit_alpha_to_one_instructions(emit
, fs_color_tmp_index
);
10593 if (emit
->key
.fs
.alpha_func
!= SVGA3D_CMP_ALWAYS
) {
10594 emit_alpha_test_instructions(emit
, fs_color_tmp_index
);
10596 if (emit
->key
.fs
.write_color0_to_n_cbufs
> 1 ||
10597 emit
->key
.fs
.white_fragments
) {
10598 emit_broadcast_color_instructions(emit
, fs_color_tmp_index
);
10601 else if (emit
->unit
== PIPE_SHADER_TESS_CTRL
) {
10602 if (!emit
->tcs
.control_point_phase
) {
10603 /* store the tessellation levels in the patch constant phase only */
10604 store_tesslevels(emit
);
10607 emit_clipping_instructions(emit
);
10610 else if (emit
->unit
== PIPE_SHADER_TESS_EVAL
) {
10611 emit_vertex_instructions(emit
);
10619 * Translate the TGSI tokens into VGPU10 tokens.
10622 emit_vgpu10_instructions(struct svga_shader_emitter_v10
*emit
,
10623 const struct tgsi_token
*tokens
)
10625 struct tgsi_parse_context parse
;
10626 boolean ret
= TRUE
;
10627 boolean pre_helpers_emitted
= FALSE
;
10628 unsigned inst_number
= 0;
10630 tgsi_parse_init(&parse
, tokens
);
10632 while (!tgsi_parse_end_of_tokens(&parse
)) {
10634 /* Save the current tgsi token starting position */
10635 emit
->cur_tgsi_token
= parse
.Position
;
10637 tgsi_parse_token(&parse
);
10639 switch (parse
.FullToken
.Token
.Type
) {
10640 case TGSI_TOKEN_TYPE_IMMEDIATE
:
10641 ret
= emit_vgpu10_immediate(emit
, &parse
.FullToken
.FullImmediate
);
10646 case TGSI_TOKEN_TYPE_DECLARATION
:
10647 ret
= emit_vgpu10_declaration(emit
, &parse
.FullToken
.FullDeclaration
);
10652 case TGSI_TOKEN_TYPE_INSTRUCTION
:
10653 if (!pre_helpers_emitted
) {
10654 ret
= emit_pre_helpers(emit
);
10657 pre_helpers_emitted
= TRUE
;
10659 ret
= emit_vgpu10_instruction(emit
, inst_number
++,
10660 &parse
.FullToken
.FullInstruction
);
10662 /* Usually this applies to TCS only. If shader is reading control
10663 * point outputs in control point phase, we should reemit all
10664 * instructions which are writting into control point output in
10665 * control phase to store results into temporaries.
10667 if (emit
->reemit_instruction
) {
10668 assert(emit
->unit
== PIPE_SHADER_TESS_CTRL
);
10669 ret
= emit_vgpu10_instruction(emit
, inst_number
,
10670 &parse
.FullToken
.FullInstruction
);
10672 else if (emit
->initialize_temp_index
!= INVALID_INDEX
) {
10673 emit_initialize_temp_instruction(emit
);
10674 emit
->initialize_temp_index
= INVALID_INDEX
;
10675 ret
= emit_vgpu10_instruction(emit
, inst_number
- 1,
10676 &parse
.FullToken
.FullInstruction
);
10683 case TGSI_TOKEN_TYPE_PROPERTY
:
10684 ret
= emit_vgpu10_property(emit
, &parse
.FullToken
.FullProperty
);
10694 if (emit
->unit
== PIPE_SHADER_TESS_CTRL
) {
10695 ret
= emit_hull_shader_patch_constant_phase(emit
, &parse
);
10699 tgsi_parse_free(&parse
);
10705 * Emit the first VGPU10 shader tokens.
10708 emit_vgpu10_header(struct svga_shader_emitter_v10
*emit
)
10710 VGPU10ProgramToken ptoken
;
10712 /* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */
10713 ptoken
.value
= 0; /* init whole token to zero */
10714 ptoken
.majorVersion
= emit
->version
/ 10;
10715 ptoken
.minorVersion
= emit
->version
% 10;
10716 ptoken
.programType
= translate_shader_type(emit
->unit
);
10717 if (!emit_dword(emit
, ptoken
.value
))
10720 /* Second token: total length of shader, in tokens. We can't fill this
10721 * in until we're all done. Emit zero for now.
10723 if (!emit_dword(emit
, 0))
10726 if (emit
->version
>= 50) {
10727 VGPU10OpcodeToken0 token
;
10729 if (emit
->unit
== PIPE_SHADER_TESS_CTRL
) {
10730 /* For hull shader, we need to start the declarations phase first before
10731 * emitting any declarations including the global flags.
10734 token
.opcodeType
= VGPU10_OPCODE_HS_DECLS
;
10735 begin_emit_instruction(emit
);
10736 emit_dword(emit
, token
.value
);
10737 end_emit_instruction(emit
);
10740 /* Emit global flags */
10741 token
.value
= 0; /* init whole token to zero */
10742 token
.opcodeType
= VGPU10_OPCODE_DCL_GLOBAL_FLAGS
;
10743 token
.enableDoublePrecisionFloatOps
= 1; /* set bit */
10744 token
.instructionLength
= 1;
10745 if (!emit_dword(emit
, token
.value
))
10749 if (emit
->version
>= 40) {
10750 VGPU10OpcodeToken0 token
;
10752 /* Reserved for global flag such as refactoringAllowed.
10753 * If the shader does not use the precise qualifier, we will set the
10754 * refactoringAllowed global flag; otherwise, we will leave the reserved
10757 emit
->reserved_token
= (emit
->ptr
- emit
->buf
) / sizeof(VGPU10OpcodeToken0
);
10759 token
.opcodeType
= VGPU10_OPCODE_NOP
;
10760 token
.instructionLength
= 1;
10761 if (!emit_dword(emit
, token
.value
))
10770 emit_vgpu10_tail(struct svga_shader_emitter_v10
*emit
)
10772 VGPU10ProgramToken
*tokens
;
10774 /* Replace the second token with total shader length */
10775 tokens
= (VGPU10ProgramToken
*) emit
->buf
;
10776 tokens
[1].value
= emit_get_num_tokens(emit
);
10778 if (emit
->version
>= 40 && !emit
->uses_precise_qualifier
) {
10779 /* Replace the reserved token with the RefactoringAllowed global flag */
10780 VGPU10OpcodeToken0
*ptoken
;
10782 ptoken
= (VGPU10OpcodeToken0
*)&tokens
[emit
->reserved_token
];
10783 assert(ptoken
->opcodeType
== VGPU10_OPCODE_NOP
);
10784 ptoken
->opcodeType
= VGPU10_OPCODE_DCL_GLOBAL_FLAGS
;
10785 ptoken
->refactoringAllowed
= 1;
10793 * Modify the FS to read the BCOLORs and use the FACE register
10794 * to choose between the front/back colors.
10796 static const struct tgsi_token
*
10797 transform_fs_twoside(const struct tgsi_token
*tokens
)
10800 debug_printf("Before tgsi_add_two_side ------------------\n");
10801 tgsi_dump(tokens
,0);
10803 tokens
= tgsi_add_two_side(tokens
);
10805 debug_printf("After tgsi_add_two_side ------------------\n");
10806 tgsi_dump(tokens
, 0);
10813 * Modify the FS to do polygon stipple.
10815 static const struct tgsi_token
*
10816 transform_fs_pstipple(struct svga_shader_emitter_v10
*emit
,
10817 const struct tgsi_token
*tokens
)
10819 const struct tgsi_token
*new_tokens
;
10823 debug_printf("Before pstipple ------------------\n");
10824 tgsi_dump(tokens
,0);
10827 new_tokens
= util_pstipple_create_fragment_shader(tokens
, &unit
, 0,
10830 emit
->fs
.pstipple_sampler_unit
= unit
;
10832 /* Setup texture state for stipple */
10833 emit
->sampler_target
[unit
] = TGSI_TEXTURE_2D
;
10834 emit
->key
.tex
[unit
].swizzle_r
= TGSI_SWIZZLE_X
;
10835 emit
->key
.tex
[unit
].swizzle_g
= TGSI_SWIZZLE_Y
;
10836 emit
->key
.tex
[unit
].swizzle_b
= TGSI_SWIZZLE_Z
;
10837 emit
->key
.tex
[unit
].swizzle_a
= TGSI_SWIZZLE_W
;
10840 debug_printf("After pstipple ------------------\n");
10841 tgsi_dump(new_tokens
, 0);
10848 * Modify the FS to support anti-aliasing point.
10850 static const struct tgsi_token
*
10851 transform_fs_aapoint(const struct tgsi_token
*tokens
,
10852 int aa_coord_index
)
10855 debug_printf("Before tgsi_add_aa_point ------------------\n");
10856 tgsi_dump(tokens
,0);
10858 tokens
= tgsi_add_aa_point(tokens
, aa_coord_index
);
10860 debug_printf("After tgsi_add_aa_point ------------------\n");
10861 tgsi_dump(tokens
, 0);
10868 * A helper function to determine the shader in the previous stage and
10869 * then call the linker function to determine the input mapping for this
10870 * shader to match the output indices from the shader in the previous stage.
10873 compute_input_mapping(struct svga_context
*svga
,
10874 struct svga_shader_emitter_v10
*emit
,
10875 enum pipe_shader_type unit
)
10877 struct svga_shader
*prevShader
= NULL
; /* shader in the previous stage */
10879 if (unit
== PIPE_SHADER_FRAGMENT
) {
10880 prevShader
= svga
->curr
.gs
?
10881 &svga
->curr
.gs
->base
: (svga
->curr
.tes
?
10882 &svga
->curr
.tes
->base
: &svga
->curr
.vs
->base
);
10883 } else if (unit
== PIPE_SHADER_GEOMETRY
) {
10884 prevShader
= svga
->curr
.tes
? &svga
->curr
.tes
->base
: &svga
->curr
.vs
->base
;
10885 } else if (unit
== PIPE_SHADER_TESS_EVAL
) {
10886 assert(svga
->curr
.tcs
);
10887 prevShader
= &svga
->curr
.tcs
->base
;
10888 } else if (unit
== PIPE_SHADER_TESS_CTRL
) {
10889 assert(svga
->curr
.vs
);
10890 prevShader
= &svga
->curr
.vs
->base
;
10893 if (prevShader
!= NULL
) {
10894 svga_link_shaders(&prevShader
->info
, &emit
->info
, &emit
->linkage
);
10895 emit
->prevShaderInfo
= &prevShader
->info
;
10899 * Since vertex shader does not need to go through the linker to
10900 * establish the input map, we need to make sure the highest index
10901 * of input registers is set properly here.
10903 emit
->linkage
.input_map_max
= MAX2((int)emit
->linkage
.input_map_max
,
10904 emit
->info
.file_max
[TGSI_FILE_INPUT
]);
10910 * Copies the shader signature info to the shader variant
10913 copy_shader_signature(struct svga_shader_signature
*sgn
,
10914 struct svga_shader_variant
*variant
)
10916 SVGA3dDXShaderSignatureHeader
*header
= &sgn
->header
;
10918 /* Calculate the signature length */
10919 variant
->signatureLen
= sizeof(SVGA3dDXShaderSignatureHeader
) +
10920 (header
->numInputSignatures
+
10921 header
->numOutputSignatures
+
10922 header
->numPatchConstantSignatures
) *
10923 sizeof(SVGA3dDXShaderSignatureEntry
);
10925 /* Allocate buffer for the signature info */
10926 variant
->signature
=
10927 (SVGA3dDXShaderSignatureHeader
*)CALLOC(1, variant
->signatureLen
);
10929 char *sgnBuf
= (char *)variant
->signature
;
10932 /* Copy the signature info to the shader variant structure */
10933 memcpy(sgnBuf
, &sgn
->header
, sizeof(SVGA3dDXShaderSignatureHeader
));
10934 sgnBuf
+= sizeof(SVGA3dDXShaderSignatureHeader
);
10936 if (header
->numInputSignatures
) {
10938 header
->numInputSignatures
* sizeof(SVGA3dDXShaderSignatureEntry
);
10939 memcpy(sgnBuf
, &sgn
->inputs
[0], sgnLen
);
10943 if (header
->numOutputSignatures
) {
10945 header
->numOutputSignatures
* sizeof(SVGA3dDXShaderSignatureEntry
);
10946 memcpy(sgnBuf
, &sgn
->outputs
[0], sgnLen
);
10950 if (header
->numPatchConstantSignatures
) {
10952 header
->numPatchConstantSignatures
* sizeof(SVGA3dDXShaderSignatureEntry
);
10953 memcpy(sgnBuf
, &sgn
->patchConstants
[0], sgnLen
);
10959 * This is the main entrypoint for the TGSI -> VPGU10 translator.
10961 struct svga_shader_variant
*
10962 svga_tgsi_vgpu10_translate(struct svga_context
*svga
,
10963 const struct svga_shader
*shader
,
10964 const struct svga_compile_key
*key
,
10965 enum pipe_shader_type unit
)
10967 struct svga_shader_variant
*variant
= NULL
;
10968 struct svga_shader_emitter_v10
*emit
;
10969 const struct tgsi_token
*tokens
= shader
->tokens
;
10971 (void) make_immediate_reg_double
; /* unused at this time */
10973 assert(unit
== PIPE_SHADER_VERTEX
||
10974 unit
== PIPE_SHADER_GEOMETRY
||
10975 unit
== PIPE_SHADER_FRAGMENT
||
10976 unit
== PIPE_SHADER_TESS_CTRL
||
10977 unit
== PIPE_SHADER_TESS_EVAL
||
10978 unit
== PIPE_SHADER_COMPUTE
);
10980 /* These two flags cannot be used together */
10981 assert(key
->vs
.need_prescale
+ key
->vs
.undo_viewport
<= 1);
10983 SVGA_STATS_TIME_PUSH(svga_sws(svga
), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE
);
10985 * Setup the code emitter
10987 emit
= alloc_emitter();
10992 if (svga_have_sm5(svga
)) {
10993 emit
->version
= 50;
10994 } else if (svga_have_sm4_1(svga
)) {
10995 emit
->version
= 41;
10997 emit
->version
= 40;
11000 emit
->signature
.header
.headerVersion
= SVGADX_SIGNATURE_HEADER_VERSION_0
;
11004 emit
->vposition
.need_prescale
= (emit
->key
.vs
.need_prescale
||
11005 emit
->key
.gs
.need_prescale
||
11006 emit
->key
.tes
.need_prescale
);
11008 /* Determine how many prescale factors in the constant buffer */
11009 emit
->vposition
.num_prescale
= 1;
11010 if (emit
->vposition
.need_prescale
&& emit
->key
.gs
.writes_viewport_index
) {
11011 assert(emit
->unit
== PIPE_SHADER_GEOMETRY
);
11012 emit
->vposition
.num_prescale
= emit
->key
.gs
.num_prescale
;
11015 emit
->vposition
.tmp_index
= INVALID_INDEX
;
11016 emit
->vposition
.so_index
= INVALID_INDEX
;
11017 emit
->vposition
.out_index
= INVALID_INDEX
;
11019 emit
->vs
.vertex_id_sys_index
= INVALID_INDEX
;
11020 emit
->vs
.vertex_id_tmp_index
= INVALID_INDEX
;
11021 emit
->vs
.vertex_id_bias_index
= INVALID_INDEX
;
11023 emit
->fs
.color_tmp_index
= INVALID_INDEX
;
11024 emit
->fs
.face_input_index
= INVALID_INDEX
;
11025 emit
->fs
.fragcoord_input_index
= INVALID_INDEX
;
11026 emit
->fs
.sample_id_sys_index
= INVALID_INDEX
;
11027 emit
->fs
.sample_pos_sys_index
= INVALID_INDEX
;
11028 emit
->fs
.sample_mask_in_sys_index
= INVALID_INDEX
;
11029 emit
->fs
.layer_input_index
= INVALID_INDEX
;
11030 emit
->fs
.layer_imm_index
= INVALID_INDEX
;
11032 emit
->gs
.prim_id_index
= INVALID_INDEX
;
11033 emit
->gs
.invocation_id_sys_index
= INVALID_INDEX
;
11034 emit
->gs
.viewport_index_out_index
= INVALID_INDEX
;
11035 emit
->gs
.viewport_index_tmp_index
= INVALID_INDEX
;
11037 emit
->tcs
.vertices_per_patch_index
= INVALID_INDEX
;
11038 emit
->tcs
.invocation_id_sys_index
= INVALID_INDEX
;
11039 emit
->tcs
.control_point_input_index
= INVALID_INDEX
;
11040 emit
->tcs
.control_point_addr_index
= INVALID_INDEX
;
11041 emit
->tcs
.control_point_out_index
= INVALID_INDEX
;
11042 emit
->tcs
.control_point_tmp_index
= INVALID_INDEX
;
11043 emit
->tcs
.control_point_out_count
= 0;
11044 emit
->tcs
.inner
.out_index
= INVALID_INDEX
;
11045 emit
->tcs
.inner
.out_index
= INVALID_INDEX
;
11046 emit
->tcs
.inner
.temp_index
= INVALID_INDEX
;
11047 emit
->tcs
.inner
.tgsi_index
= INVALID_INDEX
;
11048 emit
->tcs
.outer
.out_index
= INVALID_INDEX
;
11049 emit
->tcs
.outer
.temp_index
= INVALID_INDEX
;
11050 emit
->tcs
.outer
.tgsi_index
= INVALID_INDEX
;
11051 emit
->tcs
.patch_generic_out_count
= 0;
11052 emit
->tcs
.patch_generic_out_index
= INVALID_INDEX
;
11053 emit
->tcs
.patch_generic_tmp_index
= INVALID_INDEX
;
11054 emit
->tcs
.prim_id_index
= INVALID_INDEX
;
11056 emit
->tes
.tesscoord_sys_index
= INVALID_INDEX
;
11057 emit
->tes
.inner
.in_index
= INVALID_INDEX
;
11058 emit
->tes
.inner
.temp_index
= INVALID_INDEX
;
11059 emit
->tes
.inner
.tgsi_index
= INVALID_INDEX
;
11060 emit
->tes
.outer
.in_index
= INVALID_INDEX
;
11061 emit
->tes
.outer
.temp_index
= INVALID_INDEX
;
11062 emit
->tes
.outer
.tgsi_index
= INVALID_INDEX
;
11063 emit
->tes
.prim_id_index
= INVALID_INDEX
;
11065 emit
->clip_dist_out_index
= INVALID_INDEX
;
11066 emit
->clip_dist_tmp_index
= INVALID_INDEX
;
11067 emit
->clip_dist_so_index
= INVALID_INDEX
;
11068 emit
->clip_vertex_out_index
= INVALID_INDEX
;
11069 emit
->clip_vertex_tmp_index
= INVALID_INDEX
;
11070 emit
->svga_debug_callback
= svga
->debug
.callback
;
11072 emit
->index_range
.start_index
= INVALID_INDEX
;
11073 emit
->index_range
.count
= 0;
11074 emit
->index_range
.required
= FALSE
;
11075 emit
->index_range
.operandType
= VGPU10_NUM_OPERANDS
;
11076 emit
->index_range
.dim
= 0;
11077 emit
->index_range
.size
= 0;
11079 emit
->current_loop_depth
= 0;
11081 emit
->initialize_temp_index
= INVALID_INDEX
;
11083 if (emit
->key
.fs
.alpha_func
== SVGA3D_CMP_INVALID
) {
11084 emit
->key
.fs
.alpha_func
= SVGA3D_CMP_ALWAYS
;
11087 if (unit
== PIPE_SHADER_FRAGMENT
) {
11088 if (key
->fs
.light_twoside
) {
11089 tokens
= transform_fs_twoside(tokens
);
11091 if (key
->fs
.pstipple
) {
11092 const struct tgsi_token
*new_tokens
=
11093 transform_fs_pstipple(emit
, tokens
);
11094 if (tokens
!= shader
->tokens
) {
11095 /* free the two-sided shader tokens */
11096 tgsi_free_tokens(tokens
);
11098 tokens
= new_tokens
;
11100 if (key
->fs
.aa_point
) {
11101 tokens
= transform_fs_aapoint(tokens
, key
->fs
.aa_point_coord_index
);
11105 if (SVGA_DEBUG
& DEBUG_TGSI
) {
11106 debug_printf("#####################################\n");
11107 debug_printf("### TGSI Shader %u\n", shader
->id
);
11108 tgsi_dump(tokens
, 0);
11112 * Rescan the header if the token string is different from the one
11113 * included in the shader; otherwise, the header info is already up-to-date
11115 if (tokens
!= shader
->tokens
) {
11116 tgsi_scan_shader(tokens
, &emit
->info
);
11118 emit
->info
= shader
->info
;
11121 emit
->num_outputs
= emit
->info
.num_outputs
;
11124 * Compute input mapping to match the outputs from shader
11125 * in the previous stage
11127 compute_input_mapping(svga
, emit
, unit
);
11129 determine_clipping_mode(emit
);
11131 if (unit
== PIPE_SHADER_GEOMETRY
|| unit
== PIPE_SHADER_VERTEX
||
11132 unit
== PIPE_SHADER_TESS_CTRL
|| unit
== PIPE_SHADER_TESS_EVAL
) {
11133 if (shader
->stream_output
!= NULL
|| emit
->clip_mode
== CLIP_DISTANCE
) {
11134 /* if there is stream output declarations associated
11135 * with this shader or the shader writes to ClipDistance
11136 * then reserve extra registers for the non-adjusted vertex position
11137 * and the ClipDistance shadow copy.
11139 emit
->vposition
.so_index
= emit
->num_outputs
++;
11141 if (emit
->clip_mode
== CLIP_DISTANCE
) {
11142 emit
->clip_dist_so_index
= emit
->num_outputs
++;
11143 if (emit
->info
.num_written_clipdistance
> 4)
11144 emit
->num_outputs
++;
11150 * Do actual shader translation.
11152 if (!emit_vgpu10_header(emit
)) {
11153 debug_printf("svga: emit VGPU10 header failed\n");
11157 if (!emit_vgpu10_instructions(emit
, tokens
)) {
11158 debug_printf("svga: emit VGPU10 instructions failed\n");
11162 if (!emit_vgpu10_tail(emit
)) {
11163 debug_printf("svga: emit VGPU10 tail failed\n");
11167 if (emit
->register_overflow
) {
11172 * Create, initialize the 'variant' object.
11174 variant
= svga_new_shader_variant(svga
, unit
);
11178 variant
->shader
= shader
;
11179 variant
->nr_tokens
= emit_get_num_tokens(emit
);
11180 variant
->tokens
= (const unsigned *)emit
->buf
;
11182 /* Copy shader signature info to the shader variant */
11183 if (svga_have_sm5(svga
)) {
11184 copy_shader_signature(&emit
->signature
, variant
);
11187 emit
->buf
= NULL
; /* buffer is no longer owed by emitter context */
11188 memcpy(&variant
->key
, key
, sizeof(*key
));
11189 variant
->id
= UTIL_BITMASK_INVALID_INDEX
;
11191 /* The extra constant starting offset starts with the number of
11192 * shader constants declared in the shader.
11194 variant
->extra_const_start
= emit
->num_shader_consts
[0];
11195 if (key
->gs
.wide_point
) {
11197 * The extra constant added in the transformed shader
11198 * for inverse viewport scale is to be supplied by the driver.
11199 * So the extra constant starting offset needs to be reduced by 1.
11201 assert(variant
->extra_const_start
> 0);
11202 variant
->extra_const_start
--;
11205 if (unit
== PIPE_SHADER_FRAGMENT
) {
11206 struct svga_fs_variant
*fs_variant
= svga_fs_variant(variant
);
11208 fs_variant
->pstipple_sampler_unit
= emit
->fs
.pstipple_sampler_unit
;
11210 /* If there was exactly one write to a fragment shader output register
11211 * and it came from a constant buffer, we know all fragments will have
11212 * the same color (except for blending).
11214 fs_variant
->constant_color_output
=
11215 emit
->constant_color_output
&& emit
->num_output_writes
== 1;
11217 /** keep track in the variant if flat interpolation is used
11218 * for any of the varyings.
11220 fs_variant
->uses_flat_interp
= emit
->uses_flat_interp
;
11222 fs_variant
->fs_shadow_compare_units
= emit
->fs
.shadow_compare_units
;
11224 else if (unit
== PIPE_SHADER_TESS_EVAL
) {
11225 struct svga_tes_variant
*tes_variant
= svga_tes_variant(variant
);
11227 /* Keep track in the tes variant some of the layout parameters.
11228 * These parameters will be referenced by the tcs to emit
11229 * the necessary declarations for the hull shader.
11231 tes_variant
->prim_mode
= emit
->tes
.prim_mode
;
11232 tes_variant
->spacing
= emit
->tes
.spacing
;
11233 tes_variant
->vertices_order_cw
= emit
->tes
.vertices_order_cw
;
11234 tes_variant
->point_mode
= emit
->tes
.point_mode
;
11238 if (tokens
!= shader
->tokens
) {
11239 tgsi_free_tokens(tokens
);
11243 free_emitter(emit
);
11246 SVGA_STATS_TIME_POP(svga_sws(svga
));