2 * Copyright 2012 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 #include <llvm/Config/llvm-config.h>
27 #include "util/u_memory.h"
28 #include "util/u_string.h"
29 #include "tgsi/tgsi_build.h"
30 #include "tgsi/tgsi_strings.h"
31 #include "tgsi/tgsi_util.h"
32 #include "tgsi/tgsi_dump.h"
33 #include "tgsi/tgsi_from_mesa.h"
35 #include "ac_binary.h"
36 #include "ac_exp_param.h"
37 #include "ac_shader_util.h"
39 #include "ac_llvm_util.h"
40 #include "si_shader_internal.h"
44 #include "compiler/nir/nir.h"
45 #include "compiler/nir/nir_serialize.h"
47 static const char scratch_rsrc_dword0_symbol
[] =
48 "SCRATCH_RSRC_DWORD0";
50 static const char scratch_rsrc_dword1_symbol
[] =
51 "SCRATCH_RSRC_DWORD1";
53 static void si_init_shader_ctx(struct si_shader_context
*ctx
,
54 struct si_screen
*sscreen
,
55 struct ac_llvm_compiler
*compiler
,
59 static void si_llvm_emit_barrier(const struct lp_build_tgsi_action
*action
,
60 struct lp_build_tgsi_context
*bld_base
,
61 struct lp_build_emit_data
*emit_data
);
63 static void si_dump_shader_key(const struct si_shader
*shader
, FILE *f
);
65 static void si_build_vs_prolog_function(struct si_shader_context
*ctx
,
66 union si_shader_part_key
*key
);
67 static void si_build_tcs_epilog_function(struct si_shader_context
*ctx
,
68 union si_shader_part_key
*key
);
69 static void si_build_ps_prolog_function(struct si_shader_context
*ctx
,
70 union si_shader_part_key
*key
);
71 static void si_build_ps_epilog_function(struct si_shader_context
*ctx
,
72 union si_shader_part_key
*key
);
73 static void si_fix_resource_usage(struct si_screen
*sscreen
,
74 struct si_shader
*shader
);
76 /* Ideally pass the sample mask input to the PS epilog as v14, which
77 * is its usual location, so that the shader doesn't have to add v_mov.
79 #define PS_EPILOG_SAMPLEMASK_MIN_LOC 14
81 static bool llvm_type_is_64bit(struct si_shader_context
*ctx
,
84 if (type
== ctx
->ac
.i64
|| type
== ctx
->ac
.f64
)
90 /** Whether the shader runs as a combination of multiple API shaders */
91 static bool is_multi_part_shader(struct si_shader_context
*ctx
)
93 if (ctx
->screen
->info
.chip_class
<= GFX8
)
96 return ctx
->shader
->key
.as_ls
||
97 ctx
->shader
->key
.as_es
||
98 ctx
->type
== PIPE_SHADER_TESS_CTRL
||
99 ctx
->type
== PIPE_SHADER_GEOMETRY
;
102 /** Whether the shader runs on a merged HW stage (LSHS or ESGS) */
103 static bool is_merged_shader(struct si_shader_context
*ctx
)
105 return ctx
->shader
->key
.as_ngg
|| is_multi_part_shader(ctx
);
109 * Returns a unique index for a per-patch semantic name and index. The index
110 * must be less than 32, so that a 32-bit bitmask of used inputs or outputs
113 unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name
, unsigned index
)
115 switch (semantic_name
) {
116 case TGSI_SEMANTIC_TESSOUTER
:
118 case TGSI_SEMANTIC_TESSINNER
:
120 case TGSI_SEMANTIC_PATCH
:
125 assert(!"invalid semantic name");
131 * Returns a unique index for a semantic name and index. The index must be
132 * less than 64, so that a 64-bit bitmask of used inputs or outputs can be
135 unsigned si_shader_io_get_unique_index(unsigned semantic_name
, unsigned index
,
138 switch (semantic_name
) {
139 case TGSI_SEMANTIC_POSITION
:
141 case TGSI_SEMANTIC_GENERIC
:
142 /* Since some shader stages use the the highest used IO index
143 * to determine the size to allocate for inputs/outputs
144 * (in LDS, tess and GS rings). GENERIC should be placed right
145 * after POSITION to make that size as small as possible.
147 if (index
< SI_MAX_IO_GENERIC
)
150 assert(!"invalid generic index");
152 case TGSI_SEMANTIC_FOG
:
153 return SI_MAX_IO_GENERIC
+ 1;
154 case TGSI_SEMANTIC_COLOR
:
156 return SI_MAX_IO_GENERIC
+ 2 + index
;
157 case TGSI_SEMANTIC_BCOLOR
:
159 /* If it's a varying, COLOR and BCOLOR alias. */
161 return SI_MAX_IO_GENERIC
+ 2 + index
;
163 return SI_MAX_IO_GENERIC
+ 4 + index
;
164 case TGSI_SEMANTIC_TEXCOORD
:
166 return SI_MAX_IO_GENERIC
+ 6 + index
;
168 /* These are rarely used between LS and HS or ES and GS. */
169 case TGSI_SEMANTIC_CLIPDIST
:
171 return SI_MAX_IO_GENERIC
+ 6 + 8 + index
;
172 case TGSI_SEMANTIC_CLIPVERTEX
:
173 return SI_MAX_IO_GENERIC
+ 6 + 8 + 2;
174 case TGSI_SEMANTIC_PSIZE
:
175 return SI_MAX_IO_GENERIC
+ 6 + 8 + 3;
177 /* These can't be written by LS, HS, and ES. */
178 case TGSI_SEMANTIC_LAYER
:
179 return SI_MAX_IO_GENERIC
+ 6 + 8 + 4;
180 case TGSI_SEMANTIC_VIEWPORT_INDEX
:
181 return SI_MAX_IO_GENERIC
+ 6 + 8 + 5;
182 case TGSI_SEMANTIC_PRIMID
:
183 STATIC_ASSERT(SI_MAX_IO_GENERIC
+ 6 + 8 + 6 <= 63);
184 return SI_MAX_IO_GENERIC
+ 6 + 8 + 6;
186 fprintf(stderr
, "invalid semantic name = %u\n", semantic_name
);
187 assert(!"invalid semantic name");
193 * Get the value of a shader input parameter and extract a bitfield.
195 static LLVMValueRef
unpack_llvm_param(struct si_shader_context
*ctx
,
196 LLVMValueRef value
, unsigned rshift
,
199 if (LLVMGetTypeKind(LLVMTypeOf(value
)) == LLVMFloatTypeKind
)
200 value
= ac_to_integer(&ctx
->ac
, value
);
203 value
= LLVMBuildLShr(ctx
->ac
.builder
, value
,
204 LLVMConstInt(ctx
->i32
, rshift
, 0), "");
206 if (rshift
+ bitwidth
< 32) {
207 unsigned mask
= (1 << bitwidth
) - 1;
208 value
= LLVMBuildAnd(ctx
->ac
.builder
, value
,
209 LLVMConstInt(ctx
->i32
, mask
, 0), "");
215 LLVMValueRef
si_unpack_param(struct si_shader_context
*ctx
,
216 struct ac_arg param
, unsigned rshift
,
219 LLVMValueRef value
= ac_get_arg(&ctx
->ac
, param
);
221 return unpack_llvm_param(ctx
, value
, rshift
, bitwidth
);
224 static LLVMValueRef
get_rel_patch_id(struct si_shader_context
*ctx
)
227 case PIPE_SHADER_TESS_CTRL
:
228 return si_unpack_param(ctx
, ctx
->args
.tcs_rel_ids
, 0, 8);
230 case PIPE_SHADER_TESS_EVAL
:
231 return ac_get_arg(&ctx
->ac
, ctx
->tes_rel_patch_id
);
239 /* Tessellation shaders pass outputs to the next shader using LDS.
241 * LS outputs = TCS inputs
242 * TCS outputs = TES inputs
245 * - TCS inputs for patch 0
246 * - TCS inputs for patch 1
247 * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
249 * - TCS outputs for patch 0 = get_tcs_out_patch0_offset
250 * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
251 * - TCS outputs for patch 1
252 * - Per-patch TCS outputs for patch 1
253 * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
254 * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
257 * All three shaders VS(LS), TCS, TES share the same LDS space.
261 get_tcs_in_patch_stride(struct si_shader_context
*ctx
)
263 return si_unpack_param(ctx
, ctx
->vs_state_bits
, 8, 13);
266 static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context
*ctx
)
268 assert(ctx
->type
== PIPE_SHADER_TESS_CTRL
);
270 if (ctx
->shader
->key
.mono
.u
.ff_tcs_inputs_to_copy
)
271 return util_last_bit64(ctx
->shader
->key
.mono
.u
.ff_tcs_inputs_to_copy
) * 4;
273 return util_last_bit64(ctx
->shader
->selector
->outputs_written
) * 4;
276 static LLVMValueRef
get_tcs_out_vertex_dw_stride(struct si_shader_context
*ctx
)
278 unsigned stride
= get_tcs_out_vertex_dw_stride_constant(ctx
);
280 return LLVMConstInt(ctx
->i32
, stride
, 0);
283 static LLVMValueRef
get_tcs_out_patch_stride(struct si_shader_context
*ctx
)
285 if (ctx
->shader
->key
.mono
.u
.ff_tcs_inputs_to_copy
)
286 return si_unpack_param(ctx
, ctx
->tcs_out_lds_layout
, 0, 13);
288 const struct tgsi_shader_info
*info
= &ctx
->shader
->selector
->info
;
289 unsigned tcs_out_vertices
= info
->properties
[TGSI_PROPERTY_TCS_VERTICES_OUT
];
290 unsigned vertex_dw_stride
= get_tcs_out_vertex_dw_stride_constant(ctx
);
291 unsigned num_patch_outputs
= util_last_bit64(ctx
->shader
->selector
->patch_outputs_written
);
292 unsigned patch_dw_stride
= tcs_out_vertices
* vertex_dw_stride
+
293 num_patch_outputs
* 4;
294 return LLVMConstInt(ctx
->i32
, patch_dw_stride
, 0);
298 get_tcs_out_patch0_offset(struct si_shader_context
*ctx
)
300 return LLVMBuildMul(ctx
->ac
.builder
,
301 si_unpack_param(ctx
, ctx
->tcs_out_lds_offsets
, 0, 16),
302 LLVMConstInt(ctx
->i32
, 4, 0), "");
306 get_tcs_out_patch0_patch_data_offset(struct si_shader_context
*ctx
)
308 return LLVMBuildMul(ctx
->ac
.builder
,
309 si_unpack_param(ctx
, ctx
->tcs_out_lds_offsets
, 16, 16),
310 LLVMConstInt(ctx
->i32
, 4, 0), "");
314 get_tcs_in_current_patch_offset(struct si_shader_context
*ctx
)
316 LLVMValueRef patch_stride
= get_tcs_in_patch_stride(ctx
);
317 LLVMValueRef rel_patch_id
= get_rel_patch_id(ctx
);
319 return LLVMBuildMul(ctx
->ac
.builder
, patch_stride
, rel_patch_id
, "");
323 get_tcs_out_current_patch_offset(struct si_shader_context
*ctx
)
325 LLVMValueRef patch0_offset
= get_tcs_out_patch0_offset(ctx
);
326 LLVMValueRef patch_stride
= get_tcs_out_patch_stride(ctx
);
327 LLVMValueRef rel_patch_id
= get_rel_patch_id(ctx
);
329 return ac_build_imad(&ctx
->ac
, patch_stride
, rel_patch_id
, patch0_offset
);
333 get_tcs_out_current_patch_data_offset(struct si_shader_context
*ctx
)
335 LLVMValueRef patch0_patch_data_offset
=
336 get_tcs_out_patch0_patch_data_offset(ctx
);
337 LLVMValueRef patch_stride
= get_tcs_out_patch_stride(ctx
);
338 LLVMValueRef rel_patch_id
= get_rel_patch_id(ctx
);
340 return ac_build_imad(&ctx
->ac
, patch_stride
, rel_patch_id
, patch0_patch_data_offset
);
343 static LLVMValueRef
get_num_tcs_out_vertices(struct si_shader_context
*ctx
)
345 unsigned tcs_out_vertices
=
346 ctx
->shader
->selector
?
347 ctx
->shader
->selector
->info
.properties
[TGSI_PROPERTY_TCS_VERTICES_OUT
] : 0;
349 /* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS epilog. */
350 if (ctx
->type
== PIPE_SHADER_TESS_CTRL
&& tcs_out_vertices
)
351 return LLVMConstInt(ctx
->i32
, tcs_out_vertices
, 0);
353 return si_unpack_param(ctx
, ctx
->tcs_offchip_layout
, 6, 6);
356 static LLVMValueRef
get_tcs_in_vertex_dw_stride(struct si_shader_context
*ctx
)
361 case PIPE_SHADER_VERTEX
:
362 stride
= ctx
->shader
->selector
->lshs_vertex_stride
/ 4;
363 return LLVMConstInt(ctx
->i32
, stride
, 0);
365 case PIPE_SHADER_TESS_CTRL
:
366 if (ctx
->screen
->info
.chip_class
>= GFX9
&&
367 ctx
->shader
->is_monolithic
) {
368 stride
= ctx
->shader
->key
.part
.tcs
.ls
->lshs_vertex_stride
/ 4;
369 return LLVMConstInt(ctx
->i32
, stride
, 0);
371 return si_unpack_param(ctx
, ctx
->vs_state_bits
, 24, 8);
379 static LLVMValueRef
unpack_sint16(struct si_shader_context
*ctx
,
380 LLVMValueRef i32
, unsigned index
)
385 return LLVMBuildAShr(ctx
->ac
.builder
, i32
,
386 LLVMConstInt(ctx
->i32
, 16, 0), "");
388 return LLVMBuildSExt(ctx
->ac
.builder
,
389 LLVMBuildTrunc(ctx
->ac
.builder
, i32
,
394 void si_llvm_load_input_vs(
395 struct si_shader_context
*ctx
,
396 unsigned input_index
,
399 const struct tgsi_shader_info
*info
= &ctx
->shader
->selector
->info
;
400 unsigned vs_blit_property
= info
->properties
[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD
];
402 if (vs_blit_property
) {
403 LLVMValueRef vertex_id
= ctx
->abi
.vertex_id
;
404 LLVMValueRef sel_x1
= LLVMBuildICmp(ctx
->ac
.builder
,
405 LLVMIntULE
, vertex_id
,
407 /* Use LLVMIntNE, because we have 3 vertices and only
408 * the middle one should use y2.
410 LLVMValueRef sel_y1
= LLVMBuildICmp(ctx
->ac
.builder
,
411 LLVMIntNE
, vertex_id
,
414 unsigned param_vs_blit_inputs
= ctx
->vs_blit_inputs
.arg_index
;
415 if (input_index
== 0) {
417 LLVMValueRef x1y1
= LLVMGetParam(ctx
->main_fn
,
418 param_vs_blit_inputs
);
419 LLVMValueRef x2y2
= LLVMGetParam(ctx
->main_fn
,
420 param_vs_blit_inputs
+ 1);
422 LLVMValueRef x1
= unpack_sint16(ctx
, x1y1
, 0);
423 LLVMValueRef y1
= unpack_sint16(ctx
, x1y1
, 1);
424 LLVMValueRef x2
= unpack_sint16(ctx
, x2y2
, 0);
425 LLVMValueRef y2
= unpack_sint16(ctx
, x2y2
, 1);
427 LLVMValueRef x
= LLVMBuildSelect(ctx
->ac
.builder
, sel_x1
,
429 LLVMValueRef y
= LLVMBuildSelect(ctx
->ac
.builder
, sel_y1
,
432 out
[0] = LLVMBuildSIToFP(ctx
->ac
.builder
, x
, ctx
->f32
, "");
433 out
[1] = LLVMBuildSIToFP(ctx
->ac
.builder
, y
, ctx
->f32
, "");
434 out
[2] = LLVMGetParam(ctx
->main_fn
,
435 param_vs_blit_inputs
+ 2);
436 out
[3] = ctx
->ac
.f32_1
;
440 /* Color or texture coordinates: */
441 assert(input_index
== 1);
443 if (vs_blit_property
== SI_VS_BLIT_SGPRS_POS_COLOR
) {
444 for (int i
= 0; i
< 4; i
++) {
445 out
[i
] = LLVMGetParam(ctx
->main_fn
,
446 param_vs_blit_inputs
+ 3 + i
);
449 assert(vs_blit_property
== SI_VS_BLIT_SGPRS_POS_TEXCOORD
);
450 LLVMValueRef x1
= LLVMGetParam(ctx
->main_fn
,
451 param_vs_blit_inputs
+ 3);
452 LLVMValueRef y1
= LLVMGetParam(ctx
->main_fn
,
453 param_vs_blit_inputs
+ 4);
454 LLVMValueRef x2
= LLVMGetParam(ctx
->main_fn
,
455 param_vs_blit_inputs
+ 5);
456 LLVMValueRef y2
= LLVMGetParam(ctx
->main_fn
,
457 param_vs_blit_inputs
+ 6);
459 out
[0] = LLVMBuildSelect(ctx
->ac
.builder
, sel_x1
,
461 out
[1] = LLVMBuildSelect(ctx
->ac
.builder
, sel_y1
,
463 out
[2] = LLVMGetParam(ctx
->main_fn
,
464 param_vs_blit_inputs
+ 7);
465 out
[3] = LLVMGetParam(ctx
->main_fn
,
466 param_vs_blit_inputs
+ 8);
471 union si_vs_fix_fetch fix_fetch
;
472 LLVMValueRef t_list_ptr
;
473 LLVMValueRef t_offset
;
475 LLVMValueRef vertex_index
;
478 /* Load the T list */
479 t_list_ptr
= ac_get_arg(&ctx
->ac
, ctx
->vertex_buffers
);
481 t_offset
= LLVMConstInt(ctx
->i32
, input_index
, 0);
483 t_list
= ac_build_load_to_sgpr(&ctx
->ac
, t_list_ptr
, t_offset
);
485 vertex_index
= LLVMGetParam(ctx
->main_fn
,
486 ctx
->vertex_index0
.arg_index
+
489 /* Use the open-coded implementation for all loads of doubles and
490 * of dword-sized data that needs fixups. We need to insert conversion
491 * code anyway, and the amd/common code does it for us.
493 * Note: On LLVM <= 8, we can only open-code formats with
494 * channel size >= 4 bytes.
496 bool opencode
= ctx
->shader
->key
.mono
.vs_fetch_opencode
& (1 << input_index
);
497 fix_fetch
.bits
= ctx
->shader
->key
.mono
.vs_fix_fetch
[input_index
].bits
;
499 (fix_fetch
.u
.log_size
== 3 && fix_fetch
.u
.format
== AC_FETCH_FORMAT_FLOAT
) ||
500 (fix_fetch
.u
.log_size
== 2)) {
501 tmp
= ac_build_opencoded_load_format(
502 &ctx
->ac
, fix_fetch
.u
.log_size
, fix_fetch
.u
.num_channels_m1
+ 1,
503 fix_fetch
.u
.format
, fix_fetch
.u
.reverse
, !opencode
,
504 t_list
, vertex_index
, ctx
->ac
.i32_0
, ctx
->ac
.i32_0
, 0, true);
505 for (unsigned i
= 0; i
< 4; ++i
)
506 out
[i
] = LLVMBuildExtractElement(ctx
->ac
.builder
, tmp
, LLVMConstInt(ctx
->i32
, i
, false), "");
510 /* Do multiple loads for special formats. */
511 unsigned required_channels
= util_last_bit(info
->input_usage_mask
[input_index
]);
512 LLVMValueRef fetches
[4];
513 unsigned num_fetches
;
514 unsigned fetch_stride
;
515 unsigned channels_per_fetch
;
517 if (fix_fetch
.u
.log_size
<= 1 && fix_fetch
.u
.num_channels_m1
== 2) {
518 num_fetches
= MIN2(required_channels
, 3);
519 fetch_stride
= 1 << fix_fetch
.u
.log_size
;
520 channels_per_fetch
= 1;
524 channels_per_fetch
= required_channels
;
527 for (unsigned i
= 0; i
< num_fetches
; ++i
) {
528 LLVMValueRef voffset
= LLVMConstInt(ctx
->i32
, fetch_stride
* i
, 0);
529 fetches
[i
] = ac_build_buffer_load_format(&ctx
->ac
, t_list
, vertex_index
, voffset
,
530 channels_per_fetch
, 0, true);
533 if (num_fetches
== 1 && channels_per_fetch
> 1) {
534 LLVMValueRef fetch
= fetches
[0];
535 for (unsigned i
= 0; i
< channels_per_fetch
; ++i
) {
536 tmp
= LLVMConstInt(ctx
->i32
, i
, false);
537 fetches
[i
] = LLVMBuildExtractElement(
538 ctx
->ac
.builder
, fetch
, tmp
, "");
540 num_fetches
= channels_per_fetch
;
541 channels_per_fetch
= 1;
544 for (unsigned i
= num_fetches
; i
< 4; ++i
)
545 fetches
[i
] = LLVMGetUndef(ctx
->f32
);
547 if (fix_fetch
.u
.log_size
<= 1 && fix_fetch
.u
.num_channels_m1
== 2 &&
548 required_channels
== 4) {
549 if (fix_fetch
.u
.format
== AC_FETCH_FORMAT_UINT
|| fix_fetch
.u
.format
== AC_FETCH_FORMAT_SINT
)
550 fetches
[3] = ctx
->ac
.i32_1
;
552 fetches
[3] = ctx
->ac
.f32_1
;
553 } else if (fix_fetch
.u
.log_size
== 3 &&
554 (fix_fetch
.u
.format
== AC_FETCH_FORMAT_SNORM
||
555 fix_fetch
.u
.format
== AC_FETCH_FORMAT_SSCALED
||
556 fix_fetch
.u
.format
== AC_FETCH_FORMAT_SINT
) &&
557 required_channels
== 4) {
558 /* For 2_10_10_10, the hardware returns an unsigned value;
559 * convert it to a signed one.
561 LLVMValueRef tmp
= fetches
[3];
562 LLVMValueRef c30
= LLVMConstInt(ctx
->i32
, 30, 0);
564 /* First, recover the sign-extended signed integer value. */
565 if (fix_fetch
.u
.format
== AC_FETCH_FORMAT_SSCALED
)
566 tmp
= LLVMBuildFPToUI(ctx
->ac
.builder
, tmp
, ctx
->i32
, "");
568 tmp
= ac_to_integer(&ctx
->ac
, tmp
);
570 /* For the integer-like cases, do a natural sign extension.
572 * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0
573 * and happen to contain 0, 1, 2, 3 as the two LSBs of the
576 tmp
= LLVMBuildShl(ctx
->ac
.builder
, tmp
,
577 fix_fetch
.u
.format
== AC_FETCH_FORMAT_SNORM
?
578 LLVMConstInt(ctx
->i32
, 7, 0) : c30
, "");
579 tmp
= LLVMBuildAShr(ctx
->ac
.builder
, tmp
, c30
, "");
581 /* Convert back to the right type. */
582 if (fix_fetch
.u
.format
== AC_FETCH_FORMAT_SNORM
) {
584 LLVMValueRef neg_one
= LLVMConstReal(ctx
->f32
, -1.0);
585 tmp
= LLVMBuildSIToFP(ctx
->ac
.builder
, tmp
, ctx
->f32
, "");
586 clamp
= LLVMBuildFCmp(ctx
->ac
.builder
, LLVMRealULT
, tmp
, neg_one
, "");
587 tmp
= LLVMBuildSelect(ctx
->ac
.builder
, clamp
, neg_one
, tmp
, "");
588 } else if (fix_fetch
.u
.format
== AC_FETCH_FORMAT_SSCALED
) {
589 tmp
= LLVMBuildSIToFP(ctx
->ac
.builder
, tmp
, ctx
->f32
, "");
595 for (unsigned i
= 0; i
< 4; ++i
)
596 out
[i
] = ac_to_float(&ctx
->ac
, fetches
[i
]);
599 static void declare_input_vs(
600 struct si_shader_context
*ctx
,
601 unsigned input_index
,
602 const struct tgsi_full_declaration
*decl
,
605 si_llvm_load_input_vs(ctx
, input_index
, out
);
608 LLVMValueRef
si_get_primitive_id(struct si_shader_context
*ctx
,
615 case PIPE_SHADER_VERTEX
:
616 return ac_get_arg(&ctx
->ac
, ctx
->vs_prim_id
);
617 case PIPE_SHADER_TESS_CTRL
:
618 return ac_get_arg(&ctx
->ac
, ctx
->args
.tcs_patch_id
);
619 case PIPE_SHADER_TESS_EVAL
:
620 return ac_get_arg(&ctx
->ac
, ctx
->args
.tes_patch_id
);
621 case PIPE_SHADER_GEOMETRY
:
622 return ac_get_arg(&ctx
->ac
, ctx
->args
.gs_prim_id
);
630 * Return the value of tgsi_ind_register for indexing.
631 * This is the indirect index with the constant offset added to it.
633 LLVMValueRef
si_get_indirect_index(struct si_shader_context
*ctx
,
634 const struct tgsi_ind_register
*ind
,
640 if (ind
->File
== TGSI_FILE_ADDRESS
) {
641 result
= ctx
->addrs
[ind
->Index
][ind
->Swizzle
];
642 result
= LLVMBuildLoad(ctx
->ac
.builder
, result
, "");
644 struct tgsi_full_src_register src
= {};
646 src
.Register
.File
= ind
->File
;
647 src
.Register
.Index
= ind
->Index
;
649 /* Set the second index to 0 for constants. */
650 if (ind
->File
== TGSI_FILE_CONSTANT
)
651 src
.Register
.Dimension
= 1;
653 result
= ctx
->bld_base
.emit_fetch_funcs
[ind
->File
](&ctx
->bld_base
, &src
,
656 result
= ac_to_integer(&ctx
->ac
, result
);
659 return ac_build_imad(&ctx
->ac
, result
, LLVMConstInt(ctx
->i32
, addr_mul
, 0),
660 LLVMConstInt(ctx
->i32
, rel_index
, 0));
664 * Like si_get_indirect_index, but restricts the return value to a (possibly
665 * undefined) value inside [0..num).
667 LLVMValueRef
si_get_bounded_indirect_index(struct si_shader_context
*ctx
,
668 const struct tgsi_ind_register
*ind
,
669 int rel_index
, unsigned num
)
671 LLVMValueRef result
= si_get_indirect_index(ctx
, ind
, 1, rel_index
);
673 return si_llvm_bound_index(ctx
, result
, num
);
676 static LLVMValueRef
get_dw_address_from_generic_indices(struct si_shader_context
*ctx
,
677 LLVMValueRef vertex_dw_stride
,
678 LLVMValueRef base_addr
,
679 LLVMValueRef vertex_index
,
680 LLVMValueRef param_index
,
681 unsigned input_index
,
686 if (vertex_dw_stride
) {
687 base_addr
= ac_build_imad(&ctx
->ac
, vertex_index
,
688 vertex_dw_stride
, base_addr
);
692 base_addr
= ac_build_imad(&ctx
->ac
, param_index
,
693 LLVMConstInt(ctx
->i32
, 4, 0), base_addr
);
696 int param
= is_patch
?
697 si_shader_io_get_unique_index_patch(name
[input_index
],
698 index
[input_index
]) :
699 si_shader_io_get_unique_index(name
[input_index
],
700 index
[input_index
], false);
702 /* Add the base address of the element. */
703 return LLVMBuildAdd(ctx
->ac
.builder
, base_addr
,
704 LLVMConstInt(ctx
->i32
, param
* 4, 0), "");
708 * Calculate a dword address given an input or output register and a stride.
710 static LLVMValueRef
get_dw_address(struct si_shader_context
*ctx
,
711 const struct tgsi_full_dst_register
*dst
,
712 const struct tgsi_full_src_register
*src
,
713 LLVMValueRef vertex_dw_stride
,
714 LLVMValueRef base_addr
)
716 struct tgsi_shader_info
*info
= &ctx
->shader
->selector
->info
;
717 ubyte
*name
, *index
, *array_first
;
719 struct tgsi_full_dst_register reg
;
720 LLVMValueRef vertex_index
= NULL
;
721 LLVMValueRef ind_index
= NULL
;
723 /* Set the register description. The address computation is the same
724 * for sources and destinations. */
726 reg
.Register
.File
= src
->Register
.File
;
727 reg
.Register
.Index
= src
->Register
.Index
;
728 reg
.Register
.Indirect
= src
->Register
.Indirect
;
729 reg
.Register
.Dimension
= src
->Register
.Dimension
;
730 reg
.Indirect
= src
->Indirect
;
731 reg
.Dimension
= src
->Dimension
;
732 reg
.DimIndirect
= src
->DimIndirect
;
736 /* If the register is 2-dimensional (e.g. an array of vertices
737 * in a primitive), calculate the base address of the vertex. */
738 if (reg
.Register
.Dimension
) {
739 if (reg
.Dimension
.Indirect
)
740 vertex_index
= si_get_indirect_index(ctx
, ®
.DimIndirect
,
741 1, reg
.Dimension
.Index
);
743 vertex_index
= LLVMConstInt(ctx
->i32
, reg
.Dimension
.Index
, 0);
746 /* Get information about the register. */
747 if (reg
.Register
.File
== TGSI_FILE_INPUT
) {
748 name
= info
->input_semantic_name
;
749 index
= info
->input_semantic_index
;
750 array_first
= info
->input_array_first
;
751 } else if (reg
.Register
.File
== TGSI_FILE_OUTPUT
) {
752 name
= info
->output_semantic_name
;
753 index
= info
->output_semantic_index
;
754 array_first
= info
->output_array_first
;
760 if (reg
.Register
.Indirect
) {
761 /* Add the relative address of the element. */
762 if (reg
.Indirect
.ArrayID
)
763 input_index
= array_first
[reg
.Indirect
.ArrayID
];
765 input_index
= reg
.Register
.Index
;
767 ind_index
= si_get_indirect_index(ctx
, ®
.Indirect
,
768 1, reg
.Register
.Index
- input_index
);
770 input_index
= reg
.Register
.Index
;
773 return get_dw_address_from_generic_indices(ctx
, vertex_dw_stride
,
774 base_addr
, vertex_index
,
775 ind_index
, input_index
,
777 !reg
.Register
.Dimension
);
780 /* The offchip buffer layout for TCS->TES is
782 * - attribute 0 of patch 0 vertex 0
783 * - attribute 0 of patch 0 vertex 1
784 * - attribute 0 of patch 0 vertex 2
786 * - attribute 0 of patch 1 vertex 0
787 * - attribute 0 of patch 1 vertex 1
789 * - attribute 1 of patch 0 vertex 0
790 * - attribute 1 of patch 0 vertex 1
792 * - per patch attribute 0 of patch 0
793 * - per patch attribute 0 of patch 1
796 * Note that every attribute has 4 components.
798 static LLVMValueRef
get_tcs_tes_buffer_address(struct si_shader_context
*ctx
,
799 LLVMValueRef rel_patch_id
,
800 LLVMValueRef vertex_index
,
801 LLVMValueRef param_index
)
803 LLVMValueRef base_addr
, vertices_per_patch
, num_patches
, total_vertices
;
804 LLVMValueRef param_stride
, constant16
;
806 vertices_per_patch
= get_num_tcs_out_vertices(ctx
);
807 num_patches
= si_unpack_param(ctx
, ctx
->tcs_offchip_layout
, 0, 6);
808 total_vertices
= LLVMBuildMul(ctx
->ac
.builder
, vertices_per_patch
,
811 constant16
= LLVMConstInt(ctx
->i32
, 16, 0);
813 base_addr
= ac_build_imad(&ctx
->ac
, rel_patch_id
,
814 vertices_per_patch
, vertex_index
);
815 param_stride
= total_vertices
;
817 base_addr
= rel_patch_id
;
818 param_stride
= num_patches
;
821 base_addr
= ac_build_imad(&ctx
->ac
, param_index
, param_stride
, base_addr
);
822 base_addr
= LLVMBuildMul(ctx
->ac
.builder
, base_addr
, constant16
, "");
825 LLVMValueRef patch_data_offset
=
826 si_unpack_param(ctx
, ctx
->tcs_offchip_layout
, 12, 20);
828 base_addr
= LLVMBuildAdd(ctx
->ac
.builder
, base_addr
,
829 patch_data_offset
, "");
834 /* This is a generic helper that can be shared by the NIR and TGSI backends */
835 static LLVMValueRef
get_tcs_tes_buffer_address_from_generic_indices(
836 struct si_shader_context
*ctx
,
837 LLVMValueRef vertex_index
,
838 LLVMValueRef param_index
,
844 unsigned param_index_base
;
846 param_index_base
= is_patch
?
847 si_shader_io_get_unique_index_patch(name
[param_base
], index
[param_base
]) :
848 si_shader_io_get_unique_index(name
[param_base
], index
[param_base
], false);
851 param_index
= LLVMBuildAdd(ctx
->ac
.builder
, param_index
,
852 LLVMConstInt(ctx
->i32
, param_index_base
, 0),
855 param_index
= LLVMConstInt(ctx
->i32
, param_index_base
, 0);
858 return get_tcs_tes_buffer_address(ctx
, get_rel_patch_id(ctx
),
859 vertex_index
, param_index
);
862 static LLVMValueRef
get_tcs_tes_buffer_address_from_reg(
863 struct si_shader_context
*ctx
,
864 const struct tgsi_full_dst_register
*dst
,
865 const struct tgsi_full_src_register
*src
)
867 struct tgsi_shader_info
*info
= &ctx
->shader
->selector
->info
;
868 ubyte
*name
, *index
, *array_first
;
869 struct tgsi_full_src_register reg
;
870 LLVMValueRef vertex_index
= NULL
;
871 LLVMValueRef param_index
= NULL
;
874 reg
= src
? *src
: tgsi_full_src_register_from_dst(dst
);
876 if (reg
.Register
.Dimension
) {
878 if (reg
.Dimension
.Indirect
)
879 vertex_index
= si_get_indirect_index(ctx
, ®
.DimIndirect
,
880 1, reg
.Dimension
.Index
);
882 vertex_index
= LLVMConstInt(ctx
->i32
, reg
.Dimension
.Index
, 0);
885 /* Get information about the register. */
886 if (reg
.Register
.File
== TGSI_FILE_INPUT
) {
887 name
= info
->input_semantic_name
;
888 index
= info
->input_semantic_index
;
889 array_first
= info
->input_array_first
;
890 } else if (reg
.Register
.File
== TGSI_FILE_OUTPUT
) {
891 name
= info
->output_semantic_name
;
892 index
= info
->output_semantic_index
;
893 array_first
= info
->output_array_first
;
899 if (reg
.Register
.Indirect
) {
900 if (reg
.Indirect
.ArrayID
)
901 param_base
= array_first
[reg
.Indirect
.ArrayID
];
903 param_base
= reg
.Register
.Index
;
905 param_index
= si_get_indirect_index(ctx
, ®
.Indirect
,
906 1, reg
.Register
.Index
- param_base
);
909 param_base
= reg
.Register
.Index
;
912 return get_tcs_tes_buffer_address_from_generic_indices(ctx
, vertex_index
,
913 param_index
, param_base
,
914 name
, index
, !reg
.Register
.Dimension
);
917 static LLVMValueRef
buffer_load(struct lp_build_tgsi_context
*bld_base
,
918 LLVMTypeRef type
, unsigned swizzle
,
919 LLVMValueRef buffer
, LLVMValueRef offset
,
920 LLVMValueRef base
, bool can_speculate
)
922 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
923 LLVMValueRef value
, value2
;
924 LLVMTypeRef vec_type
= LLVMVectorType(type
, 4);
927 value
= ac_build_buffer_load(&ctx
->ac
, buffer
, 4, NULL
, base
, offset
,
928 0, ac_glc
, can_speculate
, false);
930 return LLVMBuildBitCast(ctx
->ac
.builder
, value
, vec_type
, "");
933 if (!llvm_type_is_64bit(ctx
, type
)) {
934 value
= ac_build_buffer_load(&ctx
->ac
, buffer
, 4, NULL
, base
, offset
,
935 0, ac_glc
, can_speculate
, false);
937 value
= LLVMBuildBitCast(ctx
->ac
.builder
, value
, vec_type
, "");
938 return LLVMBuildExtractElement(ctx
->ac
.builder
, value
,
939 LLVMConstInt(ctx
->i32
, swizzle
, 0), "");
942 value
= ac_build_buffer_load(&ctx
->ac
, buffer
, 1, NULL
, base
, offset
,
943 swizzle
* 4, ac_glc
, can_speculate
, false);
945 value2
= ac_build_buffer_load(&ctx
->ac
, buffer
, 1, NULL
, base
, offset
,
946 swizzle
* 4 + 4, ac_glc
, can_speculate
, false);
948 return si_llvm_emit_fetch_64bit(bld_base
, type
, value
, value2
);
952 * Load from LSHS LDS storage.
954 * \param type output value type
955 * \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4
956 * \param dw_addr address in dwords
958 static LLVMValueRef
lshs_lds_load(struct lp_build_tgsi_context
*bld_base
,
959 LLVMTypeRef type
, unsigned swizzle
,
960 LLVMValueRef dw_addr
)
962 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
966 LLVMValueRef values
[TGSI_NUM_CHANNELS
];
968 for (unsigned chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++)
969 values
[chan
] = lshs_lds_load(bld_base
, type
, chan
, dw_addr
);
971 return ac_build_gather_values(&ctx
->ac
, values
,
975 /* Split 64-bit loads. */
976 if (llvm_type_is_64bit(ctx
, type
)) {
979 lo
= lshs_lds_load(bld_base
, ctx
->i32
, swizzle
, dw_addr
);
980 hi
= lshs_lds_load(bld_base
, ctx
->i32
, swizzle
+ 1, dw_addr
);
981 return si_llvm_emit_fetch_64bit(bld_base
, type
, lo
, hi
);
984 dw_addr
= LLVMBuildAdd(ctx
->ac
.builder
, dw_addr
,
985 LLVMConstInt(ctx
->i32
, swizzle
, 0), "");
987 value
= ac_lds_load(&ctx
->ac
, dw_addr
);
989 return LLVMBuildBitCast(ctx
->ac
.builder
, value
, type
, "");
993 * Store to LSHS LDS storage.
995 * \param swizzle offset (typically 0..3)
996 * \param dw_addr address in dwords
997 * \param value value to store
999 static void lshs_lds_store(struct si_shader_context
*ctx
,
1000 unsigned dw_offset_imm
, LLVMValueRef dw_addr
,
1003 dw_addr
= LLVMBuildAdd(ctx
->ac
.builder
, dw_addr
,
1004 LLVMConstInt(ctx
->i32
, dw_offset_imm
, 0), "");
1006 ac_lds_store(&ctx
->ac
, dw_addr
, value
);
1011 TESS_OFFCHIP_RING_TCS
,
1012 TESS_OFFCHIP_RING_TES
,
1015 static LLVMValueRef
get_tess_ring_descriptor(struct si_shader_context
*ctx
,
1016 enum si_tess_ring ring
)
1018 LLVMBuilderRef builder
= ctx
->ac
.builder
;
1019 LLVMValueRef addr
= ac_get_arg(&ctx
->ac
,
1020 ring
== TESS_OFFCHIP_RING_TES
?
1021 ctx
->tes_offchip_addr
:
1022 ctx
->tcs_out_lds_layout
);
1024 /* TCS only receives high 13 bits of the address. */
1025 if (ring
== TESS_OFFCHIP_RING_TCS
|| ring
== TCS_FACTOR_RING
) {
1026 addr
= LLVMBuildAnd(builder
, addr
,
1027 LLVMConstInt(ctx
->i32
, 0xfff80000, 0), "");
1030 if (ring
== TCS_FACTOR_RING
) {
1031 unsigned tf_offset
= ctx
->screen
->tess_offchip_ring_size
;
1032 addr
= LLVMBuildAdd(builder
, addr
,
1033 LLVMConstInt(ctx
->i32
, tf_offset
, 0), "");
1036 uint32_t rsrc3
= S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X
) |
1037 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y
) |
1038 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z
) |
1039 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W
);
1041 if (ctx
->screen
->info
.chip_class
>= GFX10
)
1042 rsrc3
|= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT
) |
1043 S_008F0C_OOB_SELECT(3) |
1044 S_008F0C_RESOURCE_LEVEL(1);
1046 rsrc3
|= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT
) |
1047 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32
);
1049 LLVMValueRef desc
[4];
1051 desc
[1] = LLVMConstInt(ctx
->i32
,
1052 S_008F04_BASE_ADDRESS_HI(ctx
->screen
->info
.address32_hi
), 0);
1053 desc
[2] = LLVMConstInt(ctx
->i32
, 0xffffffff, 0);
1054 desc
[3] = LLVMConstInt(ctx
->i32
, rsrc3
, false);
1056 return ac_build_gather_values(&ctx
->ac
, desc
, 4);
1059 static LLVMValueRef
fetch_input_tcs(
1060 struct lp_build_tgsi_context
*bld_base
,
1061 const struct tgsi_full_src_register
*reg
,
1062 enum tgsi_opcode_type type
, unsigned swizzle_in
)
1064 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1065 LLVMValueRef dw_addr
, stride
;
1066 unsigned swizzle
= swizzle_in
& 0xffff;
1067 stride
= get_tcs_in_vertex_dw_stride(ctx
);
1068 dw_addr
= get_tcs_in_current_patch_offset(ctx
);
1069 dw_addr
= get_dw_address(ctx
, NULL
, reg
, stride
, dw_addr
);
1071 return lshs_lds_load(bld_base
, tgsi2llvmtype(bld_base
, type
), swizzle
, dw_addr
);
1074 static LLVMValueRef
si_nir_load_tcs_varyings(struct ac_shader_abi
*abi
,
1076 LLVMValueRef vertex_index
,
1077 LLVMValueRef param_index
,
1078 unsigned const_index
,
1080 unsigned driver_location
,
1082 unsigned num_components
,
1087 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
1088 struct tgsi_shader_info
*info
= &ctx
->shader
->selector
->info
;
1089 struct lp_build_tgsi_context
*bld_base
= &ctx
->bld_base
;
1090 LLVMValueRef dw_addr
, stride
;
1092 driver_location
= driver_location
/ 4;
1095 stride
= get_tcs_in_vertex_dw_stride(ctx
);
1096 dw_addr
= get_tcs_in_current_patch_offset(ctx
);
1100 dw_addr
= get_tcs_out_current_patch_data_offset(ctx
);
1102 stride
= get_tcs_out_vertex_dw_stride(ctx
);
1103 dw_addr
= get_tcs_out_current_patch_offset(ctx
);
1108 param_index
= LLVMConstInt(ctx
->i32
, const_index
, 0);
1114 names
= info
->input_semantic_name
;
1115 indices
= info
->input_semantic_index
;
1117 names
= info
->output_semantic_name
;
1118 indices
= info
->output_semantic_index
;
1121 dw_addr
= get_dw_address_from_generic_indices(ctx
, stride
, dw_addr
,
1122 vertex_index
, param_index
,
1127 LLVMValueRef value
[4];
1128 for (unsigned i
= 0; i
< num_components
; i
++) {
1129 unsigned offset
= i
;
1130 if (llvm_type_is_64bit(ctx
, type
))
1133 offset
+= component
;
1134 value
[i
+ component
] = lshs_lds_load(bld_base
, type
, offset
, dw_addr
);
1137 return ac_build_varying_gather_values(&ctx
->ac
, value
, num_components
, component
);
1140 static LLVMValueRef
fetch_output_tcs(
1141 struct lp_build_tgsi_context
*bld_base
,
1142 const struct tgsi_full_src_register
*reg
,
1143 enum tgsi_opcode_type type
, unsigned swizzle_in
)
1145 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1146 LLVMValueRef dw_addr
, stride
;
1147 unsigned swizzle
= (swizzle_in
& 0xffff);
1149 if (reg
->Register
.Dimension
) {
1150 stride
= get_tcs_out_vertex_dw_stride(ctx
);
1151 dw_addr
= get_tcs_out_current_patch_offset(ctx
);
1152 dw_addr
= get_dw_address(ctx
, NULL
, reg
, stride
, dw_addr
);
1154 dw_addr
= get_tcs_out_current_patch_data_offset(ctx
);
1155 dw_addr
= get_dw_address(ctx
, NULL
, reg
, NULL
, dw_addr
);
1158 return lshs_lds_load(bld_base
, tgsi2llvmtype(bld_base
, type
), swizzle
, dw_addr
);
1161 static LLVMValueRef
fetch_input_tes(
1162 struct lp_build_tgsi_context
*bld_base
,
1163 const struct tgsi_full_src_register
*reg
,
1164 enum tgsi_opcode_type type
, unsigned swizzle_in
)
1166 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1167 LLVMValueRef base
, addr
;
1168 unsigned swizzle
= (swizzle_in
& 0xffff);
1170 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
1171 addr
= get_tcs_tes_buffer_address_from_reg(ctx
, NULL
, reg
);
1173 return buffer_load(bld_base
, tgsi2llvmtype(bld_base
, type
), swizzle
,
1174 ctx
->tess_offchip_ring
, base
, addr
, true);
1177 LLVMValueRef
si_nir_load_input_tes(struct ac_shader_abi
*abi
,
1179 LLVMValueRef vertex_index
,
1180 LLVMValueRef param_index
,
1181 unsigned const_index
,
1183 unsigned driver_location
,
1185 unsigned num_components
,
1190 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
1191 struct tgsi_shader_info
*info
= &ctx
->shader
->selector
->info
;
1192 LLVMValueRef base
, addr
;
1194 driver_location
= driver_location
/ 4;
1196 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
1199 param_index
= LLVMConstInt(ctx
->i32
, const_index
, 0);
1202 addr
= get_tcs_tes_buffer_address_from_generic_indices(ctx
, vertex_index
,
1203 param_index
, driver_location
,
1204 info
->input_semantic_name
,
1205 info
->input_semantic_index
,
1208 /* TODO: This will generate rather ordinary llvm code, although it
1209 * should be easy for the optimiser to fix up. In future we might want
1210 * to refactor buffer_load(), but for now this maximises code sharing
1211 * between the NIR and TGSI backends.
1213 LLVMValueRef value
[4];
1214 for (unsigned i
= 0; i
< num_components
; i
++) {
1215 unsigned offset
= i
;
1216 if (llvm_type_is_64bit(ctx
, type
)) {
1219 addr
= get_tcs_tes_buffer_address_from_generic_indices(ctx
,
1222 driver_location
+ 1,
1223 info
->input_semantic_name
,
1224 info
->input_semantic_index
,
1228 offset
= offset
% 4;
1231 offset
+= component
;
1232 value
[i
+ component
] = buffer_load(&ctx
->bld_base
, type
, offset
,
1233 ctx
->tess_offchip_ring
, base
, addr
, true);
1236 return ac_build_varying_gather_values(&ctx
->ac
, value
, num_components
, component
);
1239 static void store_output_tcs(struct lp_build_tgsi_context
*bld_base
,
1240 const struct tgsi_full_instruction
*inst
,
1241 const struct tgsi_opcode_info
*info
,
1243 LLVMValueRef dst
[4])
1245 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1246 const struct tgsi_full_dst_register
*reg
= &inst
->Dst
[index
];
1247 const struct tgsi_shader_info
*sh_info
= &ctx
->shader
->selector
->info
;
1248 unsigned chan_index
;
1249 LLVMValueRef dw_addr
, stride
;
1250 LLVMValueRef buffer
, base
, buf_addr
;
1251 LLVMValueRef values
[4];
1252 bool skip_lds_store
;
1253 bool is_tess_factor
= false, is_tess_inner
= false;
1255 /* Only handle per-patch and per-vertex outputs here.
1256 * Vectors will be lowered to scalars and this function will be called again.
1258 if (reg
->Register
.File
!= TGSI_FILE_OUTPUT
||
1259 (dst
[0] && LLVMGetTypeKind(LLVMTypeOf(dst
[0])) == LLVMVectorTypeKind
)) {
1260 si_llvm_emit_store(bld_base
, inst
, info
, index
, dst
);
1264 if (reg
->Register
.Dimension
) {
1265 stride
= get_tcs_out_vertex_dw_stride(ctx
);
1266 dw_addr
= get_tcs_out_current_patch_offset(ctx
);
1267 dw_addr
= get_dw_address(ctx
, reg
, NULL
, stride
, dw_addr
);
1268 skip_lds_store
= !sh_info
->reads_pervertex_outputs
;
1270 dw_addr
= get_tcs_out_current_patch_data_offset(ctx
);
1271 dw_addr
= get_dw_address(ctx
, reg
, NULL
, NULL
, dw_addr
);
1272 skip_lds_store
= !sh_info
->reads_perpatch_outputs
;
1274 if (!reg
->Register
.Indirect
) {
1275 int name
= sh_info
->output_semantic_name
[reg
->Register
.Index
];
1277 /* Always write tess factors into LDS for the TCS epilog. */
1278 if (name
== TGSI_SEMANTIC_TESSINNER
||
1279 name
== TGSI_SEMANTIC_TESSOUTER
) {
1280 /* The epilog doesn't read LDS if invocation 0 defines tess factors. */
1281 skip_lds_store
= !sh_info
->reads_tessfactor_outputs
&&
1282 ctx
->shader
->selector
->tcs_info
.tessfactors_are_def_in_all_invocs
;
1283 is_tess_factor
= true;
1284 is_tess_inner
= name
== TGSI_SEMANTIC_TESSINNER
;
1289 buffer
= get_tess_ring_descriptor(ctx
, TESS_OFFCHIP_RING_TCS
);
1291 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
1292 buf_addr
= get_tcs_tes_buffer_address_from_reg(ctx
, reg
, NULL
);
1294 uint32_t writemask
= reg
->Register
.WriteMask
;
1296 chan_index
= u_bit_scan(&writemask
);
1297 LLVMValueRef value
= dst
[chan_index
];
1299 if (inst
->Instruction
.Saturate
)
1300 value
= ac_build_clamp(&ctx
->ac
, value
);
1302 /* Skip LDS stores if there is no LDS read of this output. */
1303 if (!skip_lds_store
)
1304 lshs_lds_store(ctx
, chan_index
, dw_addr
, value
);
1306 value
= ac_to_integer(&ctx
->ac
, value
);
1307 values
[chan_index
] = value
;
1309 if (reg
->Register
.WriteMask
!= 0xF && !is_tess_factor
) {
1310 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, value
, 1,
1312 4 * chan_index
, ac_glc
, false);
1315 /* Write tess factors into VGPRs for the epilog. */
1316 if (is_tess_factor
&&
1317 ctx
->shader
->selector
->tcs_info
.tessfactors_are_def_in_all_invocs
) {
1318 if (!is_tess_inner
) {
1319 LLVMBuildStore(ctx
->ac
.builder
, value
, /* outer */
1320 ctx
->invoc0_tess_factors
[chan_index
]);
1321 } else if (chan_index
< 2) {
1322 LLVMBuildStore(ctx
->ac
.builder
, value
, /* inner */
1323 ctx
->invoc0_tess_factors
[4 + chan_index
]);
1328 if (reg
->Register
.WriteMask
== 0xF && !is_tess_factor
) {
1329 LLVMValueRef value
= ac_build_gather_values(&ctx
->ac
,
1331 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, value
, 4, buf_addr
,
1332 base
, 0, ac_glc
, false);
1336 static void si_nir_store_output_tcs(struct ac_shader_abi
*abi
,
1337 const struct nir_variable
*var
,
1338 LLVMValueRef vertex_index
,
1339 LLVMValueRef param_index
,
1340 unsigned const_index
,
1344 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
1345 struct tgsi_shader_info
*info
= &ctx
->shader
->selector
->info
;
1346 const unsigned component
= var
->data
.location_frac
;
1347 const bool is_patch
= var
->data
.patch
;
1348 unsigned driver_location
= var
->data
.driver_location
;
1349 LLVMValueRef dw_addr
, stride
;
1350 LLVMValueRef buffer
, base
, addr
;
1351 LLVMValueRef values
[8];
1352 bool skip_lds_store
;
1353 bool is_tess_factor
= false, is_tess_inner
= false;
1355 driver_location
= driver_location
/ 4;
1357 bool is_const
= !param_index
;
1359 param_index
= LLVMConstInt(ctx
->i32
, const_index
, 0);
1362 stride
= get_tcs_out_vertex_dw_stride(ctx
);
1363 dw_addr
= get_tcs_out_current_patch_offset(ctx
);
1364 dw_addr
= get_dw_address_from_generic_indices(ctx
, stride
, dw_addr
,
1365 vertex_index
, param_index
,
1367 info
->output_semantic_name
,
1368 info
->output_semantic_index
,
1371 skip_lds_store
= !info
->reads_pervertex_outputs
;
1373 dw_addr
= get_tcs_out_current_patch_data_offset(ctx
);
1374 dw_addr
= get_dw_address_from_generic_indices(ctx
, NULL
, dw_addr
,
1375 vertex_index
, param_index
,
1377 info
->output_semantic_name
,
1378 info
->output_semantic_index
,
1381 skip_lds_store
= !info
->reads_perpatch_outputs
;
1383 if (is_const
&& const_index
== 0) {
1384 int name
= info
->output_semantic_name
[driver_location
];
1386 /* Always write tess factors into LDS for the TCS epilog. */
1387 if (name
== TGSI_SEMANTIC_TESSINNER
||
1388 name
== TGSI_SEMANTIC_TESSOUTER
) {
1389 /* The epilog doesn't read LDS if invocation 0 defines tess factors. */
1390 skip_lds_store
= !info
->reads_tessfactor_outputs
&&
1391 ctx
->shader
->selector
->tcs_info
.tessfactors_are_def_in_all_invocs
;
1392 is_tess_factor
= true;
1393 is_tess_inner
= name
== TGSI_SEMANTIC_TESSINNER
;
1398 buffer
= get_tess_ring_descriptor(ctx
, TESS_OFFCHIP_RING_TCS
);
1400 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
1402 addr
= get_tcs_tes_buffer_address_from_generic_indices(ctx
, vertex_index
,
1403 param_index
, driver_location
,
1404 info
->output_semantic_name
,
1405 info
->output_semantic_index
,
1408 for (unsigned chan
= 0; chan
< 8; chan
++) {
1409 if (!(writemask
& (1 << chan
)))
1411 LLVMValueRef value
= ac_llvm_extract_elem(&ctx
->ac
, src
, chan
- component
);
1413 unsigned buffer_store_offset
= chan
% 4;
1415 addr
= get_tcs_tes_buffer_address_from_generic_indices(ctx
,
1418 driver_location
+ 1,
1419 info
->output_semantic_name
,
1420 info
->output_semantic_index
,
1424 /* Skip LDS stores if there is no LDS read of this output. */
1425 if (!skip_lds_store
)
1426 lshs_lds_store(ctx
, chan
, dw_addr
, value
);
1428 value
= ac_to_integer(&ctx
->ac
, value
);
1429 values
[chan
] = value
;
1431 if (writemask
!= 0xF && !is_tess_factor
) {
1432 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, value
, 1,
1434 4 * buffer_store_offset
,
1438 /* Write tess factors into VGPRs for the epilog. */
1439 if (is_tess_factor
&&
1440 ctx
->shader
->selector
->tcs_info
.tessfactors_are_def_in_all_invocs
) {
1441 if (!is_tess_inner
) {
1442 LLVMBuildStore(ctx
->ac
.builder
, value
, /* outer */
1443 ctx
->invoc0_tess_factors
[chan
]);
1444 } else if (chan
< 2) {
1445 LLVMBuildStore(ctx
->ac
.builder
, value
, /* inner */
1446 ctx
->invoc0_tess_factors
[4 + chan
]);
1451 if (writemask
== 0xF && !is_tess_factor
) {
1452 LLVMValueRef value
= ac_build_gather_values(&ctx
->ac
,
1454 ac_build_buffer_store_dword(&ctx
->ac
, buffer
, value
, 4, addr
,
1455 base
, 0, ac_glc
, false);
1459 LLVMValueRef
si_llvm_load_input_gs(struct ac_shader_abi
*abi
,
1460 unsigned input_index
,
1461 unsigned vtx_offset_param
,
1465 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
1466 struct lp_build_tgsi_context
*bld_base
= &ctx
->bld_base
;
1467 struct si_shader
*shader
= ctx
->shader
;
1468 LLVMValueRef vtx_offset
, soffset
;
1469 struct tgsi_shader_info
*info
= &shader
->selector
->info
;
1470 unsigned semantic_name
= info
->input_semantic_name
[input_index
];
1471 unsigned semantic_index
= info
->input_semantic_index
[input_index
];
1475 param
= si_shader_io_get_unique_index(semantic_name
, semantic_index
, false);
1477 /* GFX9 has the ESGS ring in LDS. */
1478 if (ctx
->screen
->info
.chip_class
>= GFX9
) {
1479 unsigned index
= vtx_offset_param
;
1481 switch (index
/ 2) {
1483 vtx_offset
= si_unpack_param(ctx
, ctx
->gs_vtx01_offset
,
1484 index
% 2 ? 16 : 0, 16);
1487 vtx_offset
= si_unpack_param(ctx
, ctx
->gs_vtx23_offset
,
1488 index
% 2 ? 16 : 0, 16);
1491 vtx_offset
= si_unpack_param(ctx
, ctx
->gs_vtx45_offset
,
1492 index
% 2 ? 16 : 0, 16);
1499 unsigned offset
= param
* 4 + swizzle
;
1500 vtx_offset
= LLVMBuildAdd(ctx
->ac
.builder
, vtx_offset
,
1501 LLVMConstInt(ctx
->i32
, offset
, false), "");
1503 LLVMValueRef ptr
= ac_build_gep0(&ctx
->ac
, ctx
->esgs_ring
, vtx_offset
);
1504 LLVMValueRef value
= LLVMBuildLoad(ctx
->ac
.builder
, ptr
, "");
1505 if (llvm_type_is_64bit(ctx
, type
)) {
1506 ptr
= LLVMBuildGEP(ctx
->ac
.builder
, ptr
,
1507 &ctx
->ac
.i32_1
, 1, "");
1508 LLVMValueRef values
[2] = {
1510 LLVMBuildLoad(ctx
->ac
.builder
, ptr
, "")
1512 value
= ac_build_gather_values(&ctx
->ac
, values
, 2);
1514 return LLVMBuildBitCast(ctx
->ac
.builder
, value
, type
, "");
1517 /* GFX6: input load from the ESGS ring in memory. */
1518 if (swizzle
== ~0) {
1519 LLVMValueRef values
[TGSI_NUM_CHANNELS
];
1521 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
1522 values
[chan
] = si_llvm_load_input_gs(abi
, input_index
, vtx_offset_param
,
1525 return ac_build_gather_values(&ctx
->ac
, values
,
1529 /* Get the vertex offset parameter on GFX6. */
1530 LLVMValueRef gs_vtx_offset
= ac_get_arg(&ctx
->ac
,
1531 ctx
->gs_vtx_offset
[vtx_offset_param
]);
1533 vtx_offset
= LLVMBuildMul(ctx
->ac
.builder
, gs_vtx_offset
,
1534 LLVMConstInt(ctx
->i32
, 4, 0), "");
1536 soffset
= LLVMConstInt(ctx
->i32
, (param
* 4 + swizzle
) * 256, 0);
1538 value
= ac_build_buffer_load(&ctx
->ac
, ctx
->esgs_ring
, 1, ctx
->i32_0
,
1539 vtx_offset
, soffset
, 0, ac_glc
, true, false);
1540 if (llvm_type_is_64bit(ctx
, type
)) {
1541 LLVMValueRef value2
;
1542 soffset
= LLVMConstInt(ctx
->i32
, (param
* 4 + swizzle
+ 1) * 256, 0);
1544 value2
= ac_build_buffer_load(&ctx
->ac
, ctx
->esgs_ring
, 1,
1545 ctx
->i32_0
, vtx_offset
, soffset
,
1546 0, ac_glc
, true, false);
1547 return si_llvm_emit_fetch_64bit(bld_base
, type
, value
, value2
);
1549 return LLVMBuildBitCast(ctx
->ac
.builder
, value
, type
, "");
1552 static LLVMValueRef
si_nir_load_input_gs(struct ac_shader_abi
*abi
,
1554 unsigned driver_location
,
1556 unsigned num_components
,
1557 unsigned vertex_index
,
1558 unsigned const_index
,
1561 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
1563 LLVMValueRef value
[4];
1564 for (unsigned i
= 0; i
< num_components
; i
++) {
1565 unsigned offset
= i
;
1566 if (llvm_type_is_64bit(ctx
, type
))
1569 offset
+= component
;
1570 value
[i
+ component
] = si_llvm_load_input_gs(&ctx
->abi
, driver_location
/ 4 + const_index
,
1571 vertex_index
, type
, offset
);
1574 return ac_build_varying_gather_values(&ctx
->ac
, value
, num_components
, component
);
1577 static LLVMValueRef
fetch_input_gs(
1578 struct lp_build_tgsi_context
*bld_base
,
1579 const struct tgsi_full_src_register
*reg
,
1580 enum tgsi_opcode_type type
,
1581 unsigned swizzle_in
)
1583 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
1584 struct tgsi_shader_info
*info
= &ctx
->shader
->selector
->info
;
1585 unsigned swizzle
= swizzle_in
& 0xffff;
1587 unsigned semantic_name
= info
->input_semantic_name
[reg
->Register
.Index
];
1588 if (swizzle
!= ~0 && semantic_name
== TGSI_SEMANTIC_PRIMID
)
1589 return si_get_primitive_id(ctx
, swizzle
);
1591 if (!reg
->Register
.Dimension
)
1594 return si_llvm_load_input_gs(&ctx
->abi
, reg
->Register
.Index
,
1595 reg
->Dimension
.Index
,
1596 tgsi2llvmtype(bld_base
, type
),
1600 static int lookup_interp_param_index(unsigned interpolate
, unsigned location
)
1602 switch (interpolate
) {
1603 case TGSI_INTERPOLATE_CONSTANT
:
1606 case TGSI_INTERPOLATE_LINEAR
:
1607 if (location
== TGSI_INTERPOLATE_LOC_SAMPLE
)
1608 return SI_PARAM_LINEAR_SAMPLE
;
1609 else if (location
== TGSI_INTERPOLATE_LOC_CENTROID
)
1610 return SI_PARAM_LINEAR_CENTROID
;
1612 return SI_PARAM_LINEAR_CENTER
;
1614 case TGSI_INTERPOLATE_COLOR
:
1615 case TGSI_INTERPOLATE_PERSPECTIVE
:
1616 if (location
== TGSI_INTERPOLATE_LOC_SAMPLE
)
1617 return SI_PARAM_PERSP_SAMPLE
;
1618 else if (location
== TGSI_INTERPOLATE_LOC_CENTROID
)
1619 return SI_PARAM_PERSP_CENTROID
;
1621 return SI_PARAM_PERSP_CENTER
;
1624 fprintf(stderr
, "Warning: Unhandled interpolation mode.\n");
1629 static LLVMValueRef
si_build_fs_interp(struct si_shader_context
*ctx
,
1630 unsigned attr_index
, unsigned chan
,
1631 LLVMValueRef prim_mask
,
1632 LLVMValueRef i
, LLVMValueRef j
)
1635 return ac_build_fs_interp(&ctx
->ac
,
1636 LLVMConstInt(ctx
->i32
, chan
, 0),
1637 LLVMConstInt(ctx
->i32
, attr_index
, 0),
1640 return ac_build_fs_interp_mov(&ctx
->ac
,
1641 LLVMConstInt(ctx
->i32
, 2, 0), /* P0 */
1642 LLVMConstInt(ctx
->i32
, chan
, 0),
1643 LLVMConstInt(ctx
->i32
, attr_index
, 0),
1648 * Interpolate a fragment shader input.
1650 * @param ctx context
1651 * @param input_index index of the input in hardware
1652 * @param semantic_name TGSI_SEMANTIC_*
1653 * @param semantic_index semantic index
1654 * @param num_interp_inputs number of all interpolated inputs (= BCOLOR offset)
1655 * @param colors_read_mask color components read (4 bits for each color, 8 bits in total)
1656 * @param interp_param interpolation weights (i,j)
1657 * @param prim_mask SI_PARAM_PRIM_MASK
1658 * @param face SI_PARAM_FRONT_FACE
1659 * @param result the return value (4 components)
1661 static void interp_fs_input(struct si_shader_context
*ctx
,
1662 unsigned input_index
,
1663 unsigned semantic_name
,
1664 unsigned semantic_index
,
1665 unsigned num_interp_inputs
,
1666 unsigned colors_read_mask
,
1667 LLVMValueRef interp_param
,
1668 LLVMValueRef prim_mask
,
1670 LLVMValueRef result
[4])
1672 LLVMValueRef i
= NULL
, j
= NULL
;
1675 /* fs.constant returns the param from the middle vertex, so it's not
1676 * really useful for flat shading. It's meant to be used for custom
1677 * interpolation (but the intrinsic can't fetch from the other two
1680 * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
1681 * to do the right thing. The only reason we use fs.constant is that
1682 * fs.interp cannot be used on integers, because they can be equal
1685 * When interp is false we will use fs.constant or for newer llvm,
1686 * amdgcn.interp.mov.
1688 bool interp
= interp_param
!= NULL
;
1691 interp_param
= LLVMBuildBitCast(ctx
->ac
.builder
, interp_param
,
1692 LLVMVectorType(ctx
->f32
, 2), "");
1694 i
= LLVMBuildExtractElement(ctx
->ac
.builder
, interp_param
,
1696 j
= LLVMBuildExtractElement(ctx
->ac
.builder
, interp_param
,
1700 if (semantic_name
== TGSI_SEMANTIC_COLOR
&&
1701 ctx
->shader
->key
.part
.ps
.prolog
.color_two_side
) {
1702 LLVMValueRef is_face_positive
;
1704 /* If BCOLOR0 is used, BCOLOR1 is at offset "num_inputs + 1",
1705 * otherwise it's at offset "num_inputs".
1707 unsigned back_attr_offset
= num_interp_inputs
;
1708 if (semantic_index
== 1 && colors_read_mask
& 0xf)
1709 back_attr_offset
+= 1;
1711 is_face_positive
= LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntNE
,
1712 face
, ctx
->i32_0
, "");
1714 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
1715 LLVMValueRef front
, back
;
1717 front
= si_build_fs_interp(ctx
,
1720 back
= si_build_fs_interp(ctx
,
1721 back_attr_offset
, chan
,
1724 result
[chan
] = LLVMBuildSelect(ctx
->ac
.builder
,
1730 } else if (semantic_name
== TGSI_SEMANTIC_FOG
) {
1731 result
[0] = si_build_fs_interp(ctx
, input_index
,
1732 0, prim_mask
, i
, j
);
1734 result
[2] = LLVMConstReal(ctx
->f32
, 0.0f
);
1735 result
[3] = LLVMConstReal(ctx
->f32
, 1.0f
);
1737 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
1738 result
[chan
] = si_build_fs_interp(ctx
,
1745 void si_llvm_load_input_fs(
1746 struct si_shader_context
*ctx
,
1747 unsigned input_index
,
1748 LLVMValueRef out
[4])
1750 struct si_shader
*shader
= ctx
->shader
;
1751 struct tgsi_shader_info
*info
= &shader
->selector
->info
;
1752 LLVMValueRef main_fn
= ctx
->main_fn
;
1753 LLVMValueRef interp_param
= NULL
;
1754 int interp_param_idx
;
1755 enum tgsi_semantic semantic_name
= info
->input_semantic_name
[input_index
];
1756 unsigned semantic_index
= info
->input_semantic_index
[input_index
];
1757 enum tgsi_interpolate_mode interp_mode
= info
->input_interpolate
[input_index
];
1758 enum tgsi_interpolate_loc interp_loc
= info
->input_interpolate_loc
[input_index
];
1760 /* Get colors from input VGPRs (set by the prolog). */
1761 if (semantic_name
== TGSI_SEMANTIC_COLOR
) {
1762 unsigned colors_read
= shader
->selector
->info
.colors_read
;
1763 unsigned mask
= colors_read
>> (semantic_index
* 4);
1764 unsigned offset
= SI_PARAM_POS_FIXED_PT
+ 1 +
1765 (semantic_index
? util_bitcount(colors_read
& 0xf) : 0);
1766 LLVMValueRef undef
= LLVMGetUndef(ctx
->f32
);
1768 out
[0] = mask
& 0x1 ? LLVMGetParam(main_fn
, offset
++) : undef
;
1769 out
[1] = mask
& 0x2 ? LLVMGetParam(main_fn
, offset
++) : undef
;
1770 out
[2] = mask
& 0x4 ? LLVMGetParam(main_fn
, offset
++) : undef
;
1771 out
[3] = mask
& 0x8 ? LLVMGetParam(main_fn
, offset
++) : undef
;
1775 interp_param_idx
= lookup_interp_param_index(interp_mode
, interp_loc
);
1776 if (interp_param_idx
== -1)
1778 else if (interp_param_idx
) {
1779 interp_param
= LLVMGetParam(ctx
->main_fn
, interp_param_idx
);
1782 interp_fs_input(ctx
, input_index
, semantic_name
,
1783 semantic_index
, 0, /* this param is unused */
1784 shader
->selector
->info
.colors_read
, interp_param
,
1785 ac_get_arg(&ctx
->ac
, ctx
->args
.prim_mask
),
1786 LLVMGetParam(main_fn
, SI_PARAM_FRONT_FACE
),
1790 static void declare_input_fs(
1791 struct si_shader_context
*ctx
,
1792 unsigned input_index
,
1793 const struct tgsi_full_declaration
*decl
,
1794 LLVMValueRef out
[4])
1796 si_llvm_load_input_fs(ctx
, input_index
, out
);
1799 LLVMValueRef
si_get_sample_id(struct si_shader_context
*ctx
)
1801 return si_unpack_param(ctx
, ctx
->args
.ancillary
, 8, 4);
1804 static LLVMValueRef
get_base_vertex(struct ac_shader_abi
*abi
)
1806 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
1808 /* For non-indexed draws, the base vertex set by the driver
1809 * (for direct draws) or the CP (for indirect draws) is the
1810 * first vertex ID, but GLSL expects 0 to be returned.
1812 LLVMValueRef vs_state
= ac_get_arg(&ctx
->ac
,
1813 ctx
->vs_state_bits
);
1814 LLVMValueRef indexed
;
1816 indexed
= LLVMBuildLShr(ctx
->ac
.builder
, vs_state
, ctx
->i32_1
, "");
1817 indexed
= LLVMBuildTrunc(ctx
->ac
.builder
, indexed
, ctx
->i1
, "");
1819 return LLVMBuildSelect(ctx
->ac
.builder
, indexed
,
1820 ac_get_arg(&ctx
->ac
, ctx
->args
.base_vertex
),
1824 static LLVMValueRef
get_block_size(struct ac_shader_abi
*abi
)
1826 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
1828 LLVMValueRef values
[3];
1829 LLVMValueRef result
;
1831 unsigned *properties
= ctx
->shader
->selector
->info
.properties
;
1833 if (properties
[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH
] != 0) {
1834 unsigned sizes
[3] = {
1835 properties
[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH
],
1836 properties
[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT
],
1837 properties
[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH
]
1840 for (i
= 0; i
< 3; ++i
)
1841 values
[i
] = LLVMConstInt(ctx
->i32
, sizes
[i
], 0);
1843 result
= ac_build_gather_values(&ctx
->ac
, values
, 3);
1845 result
= ac_get_arg(&ctx
->ac
, ctx
->block_size
);
1852 * Load a dword from a constant buffer.
1854 static LLVMValueRef
buffer_load_const(struct si_shader_context
*ctx
,
1855 LLVMValueRef resource
,
1856 LLVMValueRef offset
)
1858 return ac_build_buffer_load(&ctx
->ac
, resource
, 1, NULL
, offset
, NULL
,
1862 static LLVMValueRef
load_sample_position(struct ac_shader_abi
*abi
, LLVMValueRef sample_id
)
1864 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
1865 LLVMValueRef desc
= ac_get_arg(&ctx
->ac
, ctx
->rw_buffers
);
1866 LLVMValueRef buf_index
= LLVMConstInt(ctx
->i32
, SI_PS_CONST_SAMPLE_POSITIONS
, 0);
1867 LLVMValueRef resource
= ac_build_load_to_sgpr(&ctx
->ac
, desc
, buf_index
);
1869 /* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */
1870 LLVMValueRef offset0
= LLVMBuildMul(ctx
->ac
.builder
, sample_id
, LLVMConstInt(ctx
->i32
, 8, 0), "");
1871 LLVMValueRef offset1
= LLVMBuildAdd(ctx
->ac
.builder
, offset0
, LLVMConstInt(ctx
->i32
, 4, 0), "");
1873 LLVMValueRef pos
[4] = {
1874 buffer_load_const(ctx
, resource
, offset0
),
1875 buffer_load_const(ctx
, resource
, offset1
),
1876 LLVMConstReal(ctx
->f32
, 0),
1877 LLVMConstReal(ctx
->f32
, 0)
1880 return ac_build_gather_values(&ctx
->ac
, pos
, 4);
1883 static LLVMValueRef
load_sample_mask_in(struct ac_shader_abi
*abi
)
1885 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
1886 return ac_to_integer(&ctx
->ac
, ac_get_arg(&ctx
->ac
, ctx
->args
.sample_coverage
));
1889 static LLVMValueRef
si_load_tess_coord(struct ac_shader_abi
*abi
)
1891 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
1892 LLVMValueRef coord
[4] = {
1893 ac_get_arg(&ctx
->ac
, ctx
->tes_u
),
1894 ac_get_arg(&ctx
->ac
, ctx
->tes_v
),
1899 /* For triangles, the vector should be (u, v, 1-u-v). */
1900 if (ctx
->shader
->selector
->info
.properties
[TGSI_PROPERTY_TES_PRIM_MODE
] ==
1901 PIPE_PRIM_TRIANGLES
) {
1902 coord
[2] = LLVMBuildFSub(ctx
->ac
.builder
, ctx
->ac
.f32_1
,
1903 LLVMBuildFAdd(ctx
->ac
.builder
,
1904 coord
[0], coord
[1], ""), "");
1906 return ac_build_gather_values(&ctx
->ac
, coord
, 4);
1909 static LLVMValueRef
load_tess_level(struct si_shader_context
*ctx
,
1910 unsigned semantic_name
)
1912 LLVMValueRef base
, addr
;
1914 int param
= si_shader_io_get_unique_index_patch(semantic_name
, 0);
1916 base
= ac_get_arg(&ctx
->ac
, ctx
->tcs_offchip_offset
);
1917 addr
= get_tcs_tes_buffer_address(ctx
, get_rel_patch_id(ctx
), NULL
,
1918 LLVMConstInt(ctx
->i32
, param
, 0));
1920 return buffer_load(&ctx
->bld_base
, ctx
->f32
,
1921 ~0, ctx
->tess_offchip_ring
, base
, addr
, true);
1925 static LLVMValueRef
load_tess_level_default(struct si_shader_context
*ctx
,
1926 unsigned semantic_name
)
1928 LLVMValueRef buf
, slot
, val
[4];
1931 slot
= LLVMConstInt(ctx
->i32
, SI_HS_CONST_DEFAULT_TESS_LEVELS
, 0);
1932 buf
= ac_get_arg(&ctx
->ac
, ctx
->rw_buffers
);
1933 buf
= ac_build_load_to_sgpr(&ctx
->ac
, buf
, slot
);
1934 offset
= semantic_name
== TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL
? 4 : 0;
1936 for (i
= 0; i
< 4; i
++)
1937 val
[i
] = buffer_load_const(ctx
, buf
,
1938 LLVMConstInt(ctx
->i32
, (offset
+ i
) * 4, 0));
1939 return ac_build_gather_values(&ctx
->ac
, val
, 4);
1942 static LLVMValueRef
si_load_tess_level(struct ac_shader_abi
*abi
,
1943 unsigned varying_id
,
1944 bool load_default_state
)
1946 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
1947 unsigned semantic_name
;
1949 if (load_default_state
) {
1950 switch (varying_id
) {
1951 case VARYING_SLOT_TESS_LEVEL_INNER
:
1952 semantic_name
= TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL
;
1954 case VARYING_SLOT_TESS_LEVEL_OUTER
:
1955 semantic_name
= TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL
;
1958 unreachable("unknown tess level");
1960 return load_tess_level_default(ctx
, semantic_name
);
1963 switch (varying_id
) {
1964 case VARYING_SLOT_TESS_LEVEL_INNER
:
1965 semantic_name
= TGSI_SEMANTIC_TESSINNER
;
1967 case VARYING_SLOT_TESS_LEVEL_OUTER
:
1968 semantic_name
= TGSI_SEMANTIC_TESSOUTER
;
1971 unreachable("unknown tess level");
1974 return load_tess_level(ctx
, semantic_name
);
1978 static LLVMValueRef
si_load_patch_vertices_in(struct ac_shader_abi
*abi
)
1980 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
1981 if (ctx
->type
== PIPE_SHADER_TESS_CTRL
)
1982 return si_unpack_param(ctx
, ctx
->tcs_out_lds_layout
, 13, 6);
1983 else if (ctx
->type
== PIPE_SHADER_TESS_EVAL
)
1984 return get_num_tcs_out_vertices(ctx
);
1986 unreachable("invalid shader stage for TGSI_SEMANTIC_VERTICESIN");
1989 void si_load_system_value(struct si_shader_context
*ctx
,
1991 const struct tgsi_full_declaration
*decl
)
1993 LLVMValueRef value
= 0;
1995 assert(index
< RADEON_LLVM_MAX_SYSTEM_VALUES
);
1997 switch (decl
->Semantic
.Name
) {
1998 case TGSI_SEMANTIC_INSTANCEID
:
1999 value
= ctx
->abi
.instance_id
;
2002 case TGSI_SEMANTIC_VERTEXID
:
2003 value
= LLVMBuildAdd(ctx
->ac
.builder
,
2005 ac_get_arg(&ctx
->ac
, ctx
->args
.base_vertex
), "");
2008 case TGSI_SEMANTIC_VERTEXID_NOBASE
:
2009 /* Unused. Clarify the meaning in indexed vs. non-indexed
2010 * draws if this is ever used again. */
2014 case TGSI_SEMANTIC_BASEVERTEX
:
2015 value
= get_base_vertex(&ctx
->abi
);
2018 case TGSI_SEMANTIC_BASEINSTANCE
:
2019 value
= ac_get_arg(&ctx
->ac
, ctx
->args
.start_instance
);
2022 case TGSI_SEMANTIC_DRAWID
:
2023 value
= ac_get_arg(&ctx
->ac
, ctx
->args
.draw_id
);
2026 case TGSI_SEMANTIC_INVOCATIONID
:
2027 if (ctx
->type
== PIPE_SHADER_TESS_CTRL
) {
2028 value
= si_unpack_param(ctx
, ctx
->args
.tcs_rel_ids
, 8, 5);
2029 } else if (ctx
->type
== PIPE_SHADER_GEOMETRY
) {
2030 if (ctx
->screen
->info
.chip_class
>= GFX10
) {
2031 value
= LLVMBuildAnd(ctx
->ac
.builder
,
2032 ac_get_arg(&ctx
->ac
, ctx
->args
.gs_invocation_id
),
2033 LLVMConstInt(ctx
->i32
, 127, 0), "");
2035 value
= ac_get_arg(&ctx
->ac
, ctx
->args
.gs_invocation_id
);
2038 assert(!"INVOCATIONID not implemented");
2042 case TGSI_SEMANTIC_POSITION
:
2044 LLVMValueRef pos
[4] = {
2045 LLVMGetParam(ctx
->main_fn
, SI_PARAM_POS_X_FLOAT
),
2046 LLVMGetParam(ctx
->main_fn
, SI_PARAM_POS_Y_FLOAT
),
2047 LLVMGetParam(ctx
->main_fn
, SI_PARAM_POS_Z_FLOAT
),
2048 ac_build_fdiv(&ctx
->ac
, ctx
->ac
.f32_1
,
2049 LLVMGetParam(ctx
->main_fn
, SI_PARAM_POS_W_FLOAT
)),
2051 value
= ac_build_gather_values(&ctx
->ac
, pos
, 4);
2055 case TGSI_SEMANTIC_FACE
:
2056 value
= ac_get_arg(&ctx
->ac
, ctx
->args
.front_face
);
2059 case TGSI_SEMANTIC_SAMPLEID
:
2060 value
= si_get_sample_id(ctx
);
2063 case TGSI_SEMANTIC_SAMPLEPOS
: {
2064 LLVMValueRef pos
[4] = {
2065 LLVMGetParam(ctx
->main_fn
, SI_PARAM_POS_X_FLOAT
),
2066 LLVMGetParam(ctx
->main_fn
, SI_PARAM_POS_Y_FLOAT
),
2067 LLVMConstReal(ctx
->f32
, 0),
2068 LLVMConstReal(ctx
->f32
, 0)
2070 pos
[0] = ac_build_fract(&ctx
->ac
, pos
[0], 32);
2071 pos
[1] = ac_build_fract(&ctx
->ac
, pos
[1], 32);
2072 value
= ac_build_gather_values(&ctx
->ac
, pos
, 4);
2076 case TGSI_SEMANTIC_SAMPLEMASK
:
2077 /* This can only occur with the OpenGL Core profile, which
2078 * doesn't support smoothing.
2080 value
= LLVMGetParam(ctx
->main_fn
, SI_PARAM_SAMPLE_COVERAGE
);
2083 case TGSI_SEMANTIC_TESSCOORD
:
2084 value
= si_load_tess_coord(&ctx
->abi
);
2087 case TGSI_SEMANTIC_VERTICESIN
:
2088 value
= si_load_patch_vertices_in(&ctx
->abi
);
2091 case TGSI_SEMANTIC_TESSINNER
:
2092 case TGSI_SEMANTIC_TESSOUTER
:
2093 value
= load_tess_level(ctx
, decl
->Semantic
.Name
);
2096 case TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL
:
2097 case TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL
:
2098 value
= load_tess_level_default(ctx
, decl
->Semantic
.Name
);
2101 case TGSI_SEMANTIC_PRIMID
:
2102 value
= si_get_primitive_id(ctx
, 0);
2105 case TGSI_SEMANTIC_GRID_SIZE
:
2106 value
= ac_get_arg(&ctx
->ac
, ctx
->args
.num_work_groups
);
2109 case TGSI_SEMANTIC_BLOCK_SIZE
:
2110 value
= get_block_size(&ctx
->abi
);
2113 case TGSI_SEMANTIC_BLOCK_ID
:
2115 LLVMValueRef values
[3];
2117 for (int i
= 0; i
< 3; i
++) {
2118 values
[i
] = ctx
->i32_0
;
2119 if (ctx
->args
.workgroup_ids
[i
].used
) {
2120 values
[i
] = ac_get_arg(&ctx
->ac
, ctx
->args
.workgroup_ids
[i
]);
2123 value
= ac_build_gather_values(&ctx
->ac
, values
, 3);
2127 case TGSI_SEMANTIC_THREAD_ID
:
2128 value
= ac_get_arg(&ctx
->ac
, ctx
->args
.local_invocation_ids
);
2131 case TGSI_SEMANTIC_HELPER_INVOCATION
:
2132 value
= ac_build_load_helper_invocation(&ctx
->ac
);
2135 case TGSI_SEMANTIC_SUBGROUP_SIZE
:
2136 value
= LLVMConstInt(ctx
->i32
, ctx
->ac
.wave_size
, 0);
2139 case TGSI_SEMANTIC_SUBGROUP_INVOCATION
:
2140 value
= ac_get_thread_id(&ctx
->ac
);
2143 case TGSI_SEMANTIC_SUBGROUP_EQ_MASK
:
2145 LLVMValueRef id
= ac_get_thread_id(&ctx
->ac
);
2146 if (ctx
->ac
.wave_size
== 64)
2147 id
= LLVMBuildZExt(ctx
->ac
.builder
, id
, ctx
->i64
, "");
2148 value
= LLVMBuildShl(ctx
->ac
.builder
,
2149 LLVMConstInt(ctx
->ac
.iN_wavemask
, 1, 0), id
, "");
2150 if (ctx
->ac
.wave_size
== 32)
2151 value
= LLVMBuildZExt(ctx
->ac
.builder
, value
, ctx
->i64
, "");
2152 value
= LLVMBuildBitCast(ctx
->ac
.builder
, value
, ctx
->v2i32
, "");
2156 case TGSI_SEMANTIC_SUBGROUP_GE_MASK
:
2157 case TGSI_SEMANTIC_SUBGROUP_GT_MASK
:
2158 case TGSI_SEMANTIC_SUBGROUP_LE_MASK
:
2159 case TGSI_SEMANTIC_SUBGROUP_LT_MASK
:
2161 LLVMValueRef id
= ac_get_thread_id(&ctx
->ac
);
2162 if (decl
->Semantic
.Name
== TGSI_SEMANTIC_SUBGROUP_GT_MASK
||
2163 decl
->Semantic
.Name
== TGSI_SEMANTIC_SUBGROUP_LE_MASK
) {
2164 /* All bits set except LSB */
2165 value
= LLVMConstInt(ctx
->ac
.iN_wavemask
, -2, 0);
2168 value
= LLVMConstInt(ctx
->ac
.iN_wavemask
, -1, 0);
2170 if (ctx
->ac
.wave_size
== 64)
2171 id
= LLVMBuildZExt(ctx
->ac
.builder
, id
, ctx
->i64
, "");
2172 value
= LLVMBuildShl(ctx
->ac
.builder
, value
, id
, "");
2173 if (decl
->Semantic
.Name
== TGSI_SEMANTIC_SUBGROUP_LE_MASK
||
2174 decl
->Semantic
.Name
== TGSI_SEMANTIC_SUBGROUP_LT_MASK
)
2175 value
= LLVMBuildNot(ctx
->ac
.builder
, value
, "");
2176 if (ctx
->ac
.wave_size
== 32)
2177 value
= LLVMBuildZExt(ctx
->ac
.builder
, value
, ctx
->i64
, "");
2178 value
= LLVMBuildBitCast(ctx
->ac
.builder
, value
, ctx
->v2i32
, "");
2182 case TGSI_SEMANTIC_CS_USER_DATA_AMD
:
2183 value
= ac_get_arg(&ctx
->ac
, ctx
->cs_user_data
);
2187 assert(!"unknown system value");
2191 ctx
->system_values
[index
] = value
;
2194 void si_declare_compute_memory(struct si_shader_context
*ctx
)
2196 struct si_shader_selector
*sel
= ctx
->shader
->selector
;
2197 unsigned lds_size
= sel
->info
.properties
[TGSI_PROPERTY_CS_LOCAL_SIZE
];
2199 LLVMTypeRef i8p
= LLVMPointerType(ctx
->i8
, AC_ADDR_SPACE_LDS
);
2202 assert(!ctx
->ac
.lds
);
2204 var
= LLVMAddGlobalInAddressSpace(ctx
->ac
.module
,
2205 LLVMArrayType(ctx
->i8
, lds_size
),
2208 LLVMSetAlignment(var
, 64 * 1024);
2210 ctx
->ac
.lds
= LLVMBuildBitCast(ctx
->ac
.builder
, var
, i8p
, "");
2213 void si_tgsi_declare_compute_memory(struct si_shader_context
*ctx
,
2214 const struct tgsi_full_declaration
*decl
)
2216 assert(decl
->Declaration
.MemType
== TGSI_MEMORY_TYPE_SHARED
);
2217 assert(decl
->Range
.First
== decl
->Range
.Last
);
2219 si_declare_compute_memory(ctx
);
2222 static LLVMValueRef
load_const_buffer_desc_fast_path(struct si_shader_context
*ctx
)
2225 ac_get_arg(&ctx
->ac
, ctx
->const_and_shader_buffers
);
2226 struct si_shader_selector
*sel
= ctx
->shader
->selector
;
2228 /* Do the bounds checking with a descriptor, because
2229 * doing computation and manual bounds checking of 64-bit
2230 * addresses generates horrible VALU code with very high
2231 * VGPR usage and very low SIMD occupancy.
2233 ptr
= LLVMBuildPtrToInt(ctx
->ac
.builder
, ptr
, ctx
->ac
.intptr
, "");
2235 LLVMValueRef desc0
, desc1
;
2237 desc1
= LLVMConstInt(ctx
->i32
,
2238 S_008F04_BASE_ADDRESS_HI(ctx
->screen
->info
.address32_hi
), 0);
2240 uint32_t rsrc3
= S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X
) |
2241 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y
) |
2242 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z
) |
2243 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W
);
2245 if (ctx
->screen
->info
.chip_class
>= GFX10
)
2246 rsrc3
|= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT
) |
2247 S_008F0C_OOB_SELECT(3) |
2248 S_008F0C_RESOURCE_LEVEL(1);
2250 rsrc3
|= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT
) |
2251 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32
);
2253 LLVMValueRef desc_elems
[] = {
2256 LLVMConstInt(ctx
->i32
, (sel
->info
.const_file_max
[0] + 1) * 16, 0),
2257 LLVMConstInt(ctx
->i32
, rsrc3
, false)
2260 return ac_build_gather_values(&ctx
->ac
, desc_elems
, 4);
2263 static LLVMValueRef
load_const_buffer_desc(struct si_shader_context
*ctx
, int i
)
2265 LLVMValueRef list_ptr
= ac_get_arg(&ctx
->ac
,
2266 ctx
->const_and_shader_buffers
);
2268 return ac_build_load_to_sgpr(&ctx
->ac
, list_ptr
,
2269 LLVMConstInt(ctx
->i32
, si_get_constbuf_slot(i
), 0));
2272 static LLVMValueRef
load_ubo(struct ac_shader_abi
*abi
, LLVMValueRef index
)
2274 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
2275 struct si_shader_selector
*sel
= ctx
->shader
->selector
;
2277 LLVMValueRef ptr
= ac_get_arg(&ctx
->ac
, ctx
->const_and_shader_buffers
);
2279 if (sel
->info
.const_buffers_declared
== 1 &&
2280 sel
->info
.shader_buffers_declared
== 0) {
2281 return load_const_buffer_desc_fast_path(ctx
);
2284 index
= si_llvm_bound_index(ctx
, index
, ctx
->num_const_buffers
);
2285 index
= LLVMBuildAdd(ctx
->ac
.builder
, index
,
2286 LLVMConstInt(ctx
->i32
, SI_NUM_SHADER_BUFFERS
, 0), "");
2288 return ac_build_load_to_sgpr(&ctx
->ac
, ptr
, index
);
2292 load_ssbo(struct ac_shader_abi
*abi
, LLVMValueRef index
, bool write
)
2294 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
2295 LLVMValueRef rsrc_ptr
= ac_get_arg(&ctx
->ac
,
2296 ctx
->const_and_shader_buffers
);
2298 index
= si_llvm_bound_index(ctx
, index
, ctx
->num_shader_buffers
);
2299 index
= LLVMBuildSub(ctx
->ac
.builder
,
2300 LLVMConstInt(ctx
->i32
, SI_NUM_SHADER_BUFFERS
- 1, 0),
2303 return ac_build_load_to_sgpr(&ctx
->ac
, rsrc_ptr
, index
);
2306 static LLVMValueRef
fetch_constant(
2307 struct lp_build_tgsi_context
*bld_base
,
2308 const struct tgsi_full_src_register
*reg
,
2309 enum tgsi_opcode_type type
,
2310 unsigned swizzle_in
)
2312 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
2313 struct si_shader_selector
*sel
= ctx
->shader
->selector
;
2314 const struct tgsi_ind_register
*ireg
= ®
->Indirect
;
2316 unsigned swizzle
= swizzle_in
& 0xffff;
2318 LLVMValueRef addr
, bufp
;
2320 if (swizzle_in
== LP_CHAN_ALL
) {
2322 LLVMValueRef values
[4];
2323 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; ++chan
)
2324 values
[chan
] = fetch_constant(bld_base
, reg
, type
, chan
);
2326 return ac_build_gather_values(&ctx
->ac
, values
, 4);
2329 /* Split 64-bit loads. */
2330 if (tgsi_type_is_64bit(type
)) {
2331 LLVMValueRef lo
, hi
;
2333 lo
= fetch_constant(bld_base
, reg
, TGSI_TYPE_UNSIGNED
, swizzle
);
2334 hi
= fetch_constant(bld_base
, reg
, TGSI_TYPE_UNSIGNED
, (swizzle_in
>> 16));
2335 return si_llvm_emit_fetch_64bit(bld_base
, tgsi2llvmtype(bld_base
, type
),
2339 idx
= reg
->Register
.Index
* 4 + swizzle
;
2340 if (reg
->Register
.Indirect
) {
2341 addr
= si_get_indirect_index(ctx
, ireg
, 16, idx
* 4);
2343 addr
= LLVMConstInt(ctx
->i32
, idx
* 4, 0);
2346 /* Fast path when user data SGPRs point to constant buffer 0 directly. */
2347 if (sel
->info
.const_buffers_declared
== 1 &&
2348 sel
->info
.shader_buffers_declared
== 0) {
2349 LLVMValueRef desc
= load_const_buffer_desc_fast_path(ctx
);
2350 LLVMValueRef result
= buffer_load_const(ctx
, desc
, addr
);
2351 return bitcast(bld_base
, type
, result
);
2354 assert(reg
->Register
.Dimension
);
2355 buf
= reg
->Dimension
.Index
;
2357 if (reg
->Dimension
.Indirect
) {
2358 LLVMValueRef ptr
= ac_get_arg(&ctx
->ac
, ctx
->const_and_shader_buffers
);
2360 index
= si_get_bounded_indirect_index(ctx
, ®
->DimIndirect
,
2361 reg
->Dimension
.Index
,
2362 ctx
->num_const_buffers
);
2363 index
= LLVMBuildAdd(ctx
->ac
.builder
, index
,
2364 LLVMConstInt(ctx
->i32
, SI_NUM_SHADER_BUFFERS
, 0), "");
2365 bufp
= ac_build_load_to_sgpr(&ctx
->ac
, ptr
, index
);
2367 bufp
= load_const_buffer_desc(ctx
, buf
);
2369 return bitcast(bld_base
, type
, buffer_load_const(ctx
, bufp
, addr
));
2372 /* Initialize arguments for the shader export intrinsic */
2373 static void si_llvm_init_export_args(struct si_shader_context
*ctx
,
2374 LLVMValueRef
*values
,
2376 struct ac_export_args
*args
)
2378 LLVMValueRef f32undef
= LLVMGetUndef(ctx
->ac
.f32
);
2379 unsigned spi_shader_col_format
= V_028714_SPI_SHADER_32_ABGR
;
2381 bool is_int8
, is_int10
;
2383 /* Default is 0xf. Adjusted below depending on the format. */
2384 args
->enabled_channels
= 0xf; /* writemask */
2386 /* Specify whether the EXEC mask represents the valid mask */
2387 args
->valid_mask
= 0;
2389 /* Specify whether this is the last export */
2392 /* Specify the target we are exporting */
2393 args
->target
= target
;
2395 if (ctx
->type
== PIPE_SHADER_FRAGMENT
) {
2396 const struct si_shader_key
*key
= &ctx
->shader
->key
;
2397 unsigned col_formats
= key
->part
.ps
.epilog
.spi_shader_col_format
;
2398 int cbuf
= target
- V_008DFC_SQ_EXP_MRT
;
2400 assert(cbuf
>= 0 && cbuf
< 8);
2401 spi_shader_col_format
= (col_formats
>> (cbuf
* 4)) & 0xf;
2402 is_int8
= (key
->part
.ps
.epilog
.color_is_int8
>> cbuf
) & 0x1;
2403 is_int10
= (key
->part
.ps
.epilog
.color_is_int10
>> cbuf
) & 0x1;
2406 args
->compr
= false;
2407 args
->out
[0] = f32undef
;
2408 args
->out
[1] = f32undef
;
2409 args
->out
[2] = f32undef
;
2410 args
->out
[3] = f32undef
;
2412 LLVMValueRef (*packf
)(struct ac_llvm_context
*ctx
, LLVMValueRef args
[2]) = NULL
;
2413 LLVMValueRef (*packi
)(struct ac_llvm_context
*ctx
, LLVMValueRef args
[2],
2414 unsigned bits
, bool hi
) = NULL
;
2416 switch (spi_shader_col_format
) {
2417 case V_028714_SPI_SHADER_ZERO
:
2418 args
->enabled_channels
= 0; /* writemask */
2419 args
->target
= V_008DFC_SQ_EXP_NULL
;
2422 case V_028714_SPI_SHADER_32_R
:
2423 args
->enabled_channels
= 1; /* writemask */
2424 args
->out
[0] = values
[0];
2427 case V_028714_SPI_SHADER_32_GR
:
2428 args
->enabled_channels
= 0x3; /* writemask */
2429 args
->out
[0] = values
[0];
2430 args
->out
[1] = values
[1];
2433 case V_028714_SPI_SHADER_32_AR
:
2434 if (ctx
->screen
->info
.chip_class
>= GFX10
) {
2435 args
->enabled_channels
= 0x3; /* writemask */
2436 args
->out
[0] = values
[0];
2437 args
->out
[1] = values
[3];
2439 args
->enabled_channels
= 0x9; /* writemask */
2440 args
->out
[0] = values
[0];
2441 args
->out
[3] = values
[3];
2445 case V_028714_SPI_SHADER_FP16_ABGR
:
2446 packf
= ac_build_cvt_pkrtz_f16
;
2449 case V_028714_SPI_SHADER_UNORM16_ABGR
:
2450 packf
= ac_build_cvt_pknorm_u16
;
2453 case V_028714_SPI_SHADER_SNORM16_ABGR
:
2454 packf
= ac_build_cvt_pknorm_i16
;
2457 case V_028714_SPI_SHADER_UINT16_ABGR
:
2458 packi
= ac_build_cvt_pk_u16
;
2461 case V_028714_SPI_SHADER_SINT16_ABGR
:
2462 packi
= ac_build_cvt_pk_i16
;
2465 case V_028714_SPI_SHADER_32_ABGR
:
2466 memcpy(&args
->out
[0], values
, sizeof(values
[0]) * 4);
2470 /* Pack f16 or norm_i16/u16. */
2472 for (chan
= 0; chan
< 2; chan
++) {
2473 LLVMValueRef pack_args
[2] = {
2475 values
[2 * chan
+ 1]
2477 LLVMValueRef packed
;
2479 packed
= packf(&ctx
->ac
, pack_args
);
2480 args
->out
[chan
] = ac_to_float(&ctx
->ac
, packed
);
2482 args
->compr
= 1; /* COMPR flag */
2486 for (chan
= 0; chan
< 2; chan
++) {
2487 LLVMValueRef pack_args
[2] = {
2488 ac_to_integer(&ctx
->ac
, values
[2 * chan
]),
2489 ac_to_integer(&ctx
->ac
, values
[2 * chan
+ 1])
2491 LLVMValueRef packed
;
2493 packed
= packi(&ctx
->ac
, pack_args
,
2494 is_int8
? 8 : is_int10
? 10 : 16,
2496 args
->out
[chan
] = ac_to_float(&ctx
->ac
, packed
);
2498 args
->compr
= 1; /* COMPR flag */
2502 static void si_alpha_test(struct lp_build_tgsi_context
*bld_base
,
2505 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
2507 if (ctx
->shader
->key
.part
.ps
.epilog
.alpha_func
!= PIPE_FUNC_NEVER
) {
2508 static LLVMRealPredicate cond_map
[PIPE_FUNC_ALWAYS
+ 1] = {
2509 [PIPE_FUNC_LESS
] = LLVMRealOLT
,
2510 [PIPE_FUNC_EQUAL
] = LLVMRealOEQ
,
2511 [PIPE_FUNC_LEQUAL
] = LLVMRealOLE
,
2512 [PIPE_FUNC_GREATER
] = LLVMRealOGT
,
2513 [PIPE_FUNC_NOTEQUAL
] = LLVMRealONE
,
2514 [PIPE_FUNC_GEQUAL
] = LLVMRealOGE
,
2516 LLVMRealPredicate cond
= cond_map
[ctx
->shader
->key
.part
.ps
.epilog
.alpha_func
];
2519 LLVMValueRef alpha_ref
= LLVMGetParam(ctx
->main_fn
,
2520 SI_PARAM_ALPHA_REF
);
2521 LLVMValueRef alpha_pass
=
2522 LLVMBuildFCmp(ctx
->ac
.builder
, cond
, alpha
, alpha_ref
, "");
2523 ac_build_kill_if_false(&ctx
->ac
, alpha_pass
);
2525 ac_build_kill_if_false(&ctx
->ac
, ctx
->i1false
);
2529 static LLVMValueRef
si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context
*bld_base
,
2531 unsigned samplemask_param
)
2533 struct si_shader_context
*ctx
= si_shader_context(bld_base
);
2534 LLVMValueRef coverage
;
2536 /* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */
2537 coverage
= LLVMGetParam(ctx
->main_fn
,
2539 coverage
= ac_to_integer(&ctx
->ac
, coverage
);
2541 coverage
= ac_build_intrinsic(&ctx
->ac
, "llvm.ctpop.i32",
2543 &coverage
, 1, AC_FUNC_ATTR_READNONE
);
2545 coverage
= LLVMBuildUIToFP(ctx
->ac
.builder
, coverage
,
2548 coverage
= LLVMBuildFMul(ctx
->ac
.builder
, coverage
,
2549 LLVMConstReal(ctx
->f32
,
2550 1.0 / SI_NUM_SMOOTH_AA_SAMPLES
), "");
2552 return LLVMBuildFMul(ctx
->ac
.builder
, alpha
, coverage
, "");
2555 static void si_llvm_emit_clipvertex(struct si_shader_context
*ctx
,
2556 struct ac_export_args
*pos
, LLVMValueRef
*out_elts
)
2560 unsigned const_chan
;
2561 LLVMValueRef base_elt
;
2562 LLVMValueRef ptr
= ac_get_arg(&ctx
->ac
, ctx
->rw_buffers
);
2563 LLVMValueRef constbuf_index
= LLVMConstInt(ctx
->i32
,
2564 SI_VS_CONST_CLIP_PLANES
, 0);
2565 LLVMValueRef const_resource
= ac_build_load_to_sgpr(&ctx
->ac
, ptr
, constbuf_index
);
2567 for (reg_index
= 0; reg_index
< 2; reg_index
++) {
2568 struct ac_export_args
*args
= &pos
[2 + reg_index
];
2573 args
->out
[3] = LLVMConstReal(ctx
->f32
, 0.0f
);
2575 /* Compute dot products of position and user clip plane vectors */
2576 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
2577 for (const_chan
= 0; const_chan
< TGSI_NUM_CHANNELS
; const_chan
++) {
2579 LLVMConstInt(ctx
->i32
, ((reg_index
* 4 + chan
) * 4 +
2580 const_chan
) * 4, 0);
2581 base_elt
= buffer_load_const(ctx
, const_resource
,
2583 args
->out
[chan
] = ac_build_fmad(&ctx
->ac
, base_elt
,
2584 out_elts
[const_chan
], args
->out
[chan
]);
2588 args
->enabled_channels
= 0xf;
2589 args
->valid_mask
= 0;
2591 args
->target
= V_008DFC_SQ_EXP_POS
+ 2 + reg_index
;
2596 static void si_dump_streamout(struct pipe_stream_output_info
*so
)
2600 if (so
->num_outputs
)
2601 fprintf(stderr
, "STREAMOUT\n");
2603 for (i
= 0; i
< so
->num_outputs
; i
++) {
2604 unsigned mask
= ((1 << so
->output
[i
].num_components
) - 1) <<
2605 so
->output
[i
].start_component
;
2606 fprintf(stderr
, " %i: BUF%i[%i..%i] <- OUT[%i].%s%s%s%s\n",
2607 i
, so
->output
[i
].output_buffer
,
2608 so
->output
[i
].dst_offset
, so
->output
[i
].dst_offset
+ so
->output
[i
].num_components
- 1,
2609 so
->output
[i
].register_index
,
2610 mask
& 1 ? "x" : "",
2611 mask
& 2 ? "y" : "",
2612 mask
& 4 ? "z" : "",
2613 mask
& 8 ? "w" : "");
2617 void si_emit_streamout_output(struct si_shader_context
*ctx
,
2618 LLVMValueRef
const *so_buffers
,
2619 LLVMValueRef
const *so_write_offsets
,
2620 struct pipe_stream_output
*stream_out
,
2621 struct si_shader_output_values
*shader_out
)
2623 unsigned buf_idx
= stream_out
->output_buffer
;
2624 unsigned start
= stream_out
->start_component
;
2625 unsigned num_comps
= stream_out
->num_components
;
2626 LLVMValueRef out
[4];
2628 assert(num_comps
&& num_comps
<= 4);
2629 if (!num_comps
|| num_comps
> 4)
2632 /* Load the output as int. */
2633 for (int j
= 0; j
< num_comps
; j
++) {
2634 assert(stream_out
->stream
== shader_out
->vertex_stream
[start
+ j
]);
2636 out
[j
] = ac_to_integer(&ctx
->ac
, shader_out
->values
[start
+ j
]);
2639 /* Pack the output. */
2640 LLVMValueRef vdata
= NULL
;
2642 switch (num_comps
) {
2643 case 1: /* as i32 */
2646 case 2: /* as v2i32 */
2647 case 3: /* as v3i32 */
2648 if (ac_has_vec3_support(ctx
->screen
->info
.chip_class
, false)) {
2649 vdata
= ac_build_gather_values(&ctx
->ac
, out
, num_comps
);
2652 /* as v4i32 (aligned to 4) */
2653 out
[3] = LLVMGetUndef(ctx
->i32
);
2655 case 4: /* as v4i32 */
2656 vdata
= ac_build_gather_values(&ctx
->ac
, out
, util_next_power_of_two(num_comps
));
2660 ac_build_buffer_store_dword(&ctx
->ac
, so_buffers
[buf_idx
],
2662 so_write_offsets
[buf_idx
],
2664 stream_out
->dst_offset
* 4, ac_glc
| ac_slc
, false);
2668 * Write streamout data to buffers for vertex stream @p stream (different
2669 * vertex streams can occur for GS copy shaders).
2671 static void si_llvm_emit_streamout(struct si_shader_context
*ctx
,
2672 struct si_shader_output_values
*outputs
,
2673 unsigned noutput
, unsigned stream
)
2675 struct si_shader_selector
*sel
= ctx
->shader
->selector
;
2676 struct pipe_stream_output_info
*so
= &sel
->so
;
2677 LLVMBuilderRef builder
= ctx
->ac
.builder
;
2680 /* Get bits [22:16], i.e. (so_param >> 16) & 127; */
2681 LLVMValueRef so_vtx_count
=
2682 si_unpack_param(ctx
, ctx
->streamout_config
, 16, 7);
2684 LLVMValueRef tid
= ac_get_thread_id(&ctx
->ac
);
2686 /* can_emit = tid < so_vtx_count; */
2687 LLVMValueRef can_emit
=
2688 LLVMBuildICmp(builder
, LLVMIntULT
, tid
, so_vtx_count
, "");
2690 /* Emit the streamout code conditionally. This actually avoids
2691 * out-of-bounds buffer access. The hw tells us via the SGPR
2692 * (so_vtx_count) which threads are allowed to emit streamout data. */
2693 ac_build_ifcc(&ctx
->ac
, can_emit
, 6501);
2695 /* The buffer offset is computed as follows:
2696 * ByteOffset = streamout_offset[buffer_id]*4 +
2697 * (streamout_write_index + thread_id)*stride[buffer_id] +
2701 LLVMValueRef so_write_index
=
2702 ac_get_arg(&ctx
->ac
,
2703 ctx
->streamout_write_index
);
2705 /* Compute (streamout_write_index + thread_id). */
2706 so_write_index
= LLVMBuildAdd(builder
, so_write_index
, tid
, "");
2708 /* Load the descriptor and compute the write offset for each
2709 * enabled buffer. */
2710 LLVMValueRef so_write_offset
[4] = {};
2711 LLVMValueRef so_buffers
[4];
2712 LLVMValueRef buf_ptr
= ac_get_arg(&ctx
->ac
,
2715 for (i
= 0; i
< 4; i
++) {
2719 LLVMValueRef offset
= LLVMConstInt(ctx
->i32
,
2720 SI_VS_STREAMOUT_BUF0
+ i
, 0);
2722 so_buffers
[i
] = ac_build_load_to_sgpr(&ctx
->ac
, buf_ptr
, offset
);
2724 LLVMValueRef so_offset
= ac_get_arg(&ctx
->ac
,
2725 ctx
->streamout_offset
[i
]);
2726 so_offset
= LLVMBuildMul(builder
, so_offset
, LLVMConstInt(ctx
->i32
, 4, 0), "");
2728 so_write_offset
[i
] = ac_build_imad(&ctx
->ac
, so_write_index
,
2729 LLVMConstInt(ctx
->i32
, so
->stride
[i
]*4, 0),
2733 /* Write streamout data. */
2734 for (i
= 0; i
< so
->num_outputs
; i
++) {
2735 unsigned reg
= so
->output
[i
].register_index
;
2740 if (stream
!= so
->output
[i
].stream
)
2743 si_emit_streamout_output(ctx
, so_buffers
, so_write_offset
,
2744 &so
->output
[i
], &outputs
[reg
]);
2747 ac_build_endif(&ctx
->ac
, 6501);
2750 static void si_export_param(struct si_shader_context
*ctx
, unsigned index
,
2751 LLVMValueRef
*values
)
2753 struct ac_export_args args
;
2755 si_llvm_init_export_args(ctx
, values
,
2756 V_008DFC_SQ_EXP_PARAM
+ index
, &args
);
2757 ac_build_export(&ctx
->ac
, &args
);
2760 static void si_build_param_exports(struct si_shader_context
*ctx
,
2761 struct si_shader_output_values
*outputs
,
2764 struct si_shader
*shader
= ctx
->shader
;
2765 unsigned param_count
= 0;
2767 for (unsigned i
= 0; i
< noutput
; i
++) {
2768 unsigned semantic_name
= outputs
[i
].semantic_name
;
2769 unsigned semantic_index
= outputs
[i
].semantic_index
;
2771 if (outputs
[i
].vertex_stream
[0] != 0 &&
2772 outputs
[i
].vertex_stream
[1] != 0 &&
2773 outputs
[i
].vertex_stream
[2] != 0 &&
2774 outputs
[i
].vertex_stream
[3] != 0)
2777 switch (semantic_name
) {
2778 case TGSI_SEMANTIC_LAYER
:
2779 case TGSI_SEMANTIC_VIEWPORT_INDEX
:
2780 case TGSI_SEMANTIC_CLIPDIST
:
2781 case TGSI_SEMANTIC_COLOR
:
2782 case TGSI_SEMANTIC_BCOLOR
:
2783 case TGSI_SEMANTIC_PRIMID
:
2784 case TGSI_SEMANTIC_FOG
:
2785 case TGSI_SEMANTIC_TEXCOORD
:
2786 case TGSI_SEMANTIC_GENERIC
:
2792 if ((semantic_name
!= TGSI_SEMANTIC_GENERIC
||
2793 semantic_index
< SI_MAX_IO_GENERIC
) &&
2794 shader
->key
.opt
.kill_outputs
&
2795 (1ull << si_shader_io_get_unique_index(semantic_name
,
2796 semantic_index
, true)))
2799 si_export_param(ctx
, param_count
, outputs
[i
].values
);
2801 assert(i
< ARRAY_SIZE(shader
->info
.vs_output_param_offset
));
2802 shader
->info
.vs_output_param_offset
[i
] = param_count
++;
2805 shader
->info
.nr_param_exports
= param_count
;
2809 * Vertex color clamping.
2811 * This uses a state constant loaded in a user data SGPR and
2812 * an IF statement is added that clamps all colors if the constant
2815 static void si_vertex_color_clamping(struct si_shader_context
*ctx
,
2816 struct si_shader_output_values
*outputs
,
2819 LLVMValueRef addr
[SI_MAX_VS_OUTPUTS
][4];
2820 bool has_colors
= false;
2822 /* Store original colors to alloca variables. */
2823 for (unsigned i
= 0; i
< noutput
; i
++) {
2824 if (outputs
[i
].semantic_name
!= TGSI_SEMANTIC_COLOR
&&
2825 outputs
[i
].semantic_name
!= TGSI_SEMANTIC_BCOLOR
)
2828 for (unsigned j
= 0; j
< 4; j
++) {
2829 addr
[i
][j
] = ac_build_alloca_undef(&ctx
->ac
, ctx
->f32
, "");
2830 LLVMBuildStore(ctx
->ac
.builder
, outputs
[i
].values
[j
], addr
[i
][j
]);
2838 /* The state is in the first bit of the user SGPR. */
2839 LLVMValueRef cond
= ac_get_arg(&ctx
->ac
, ctx
->vs_state_bits
);
2840 cond
= LLVMBuildTrunc(ctx
->ac
.builder
, cond
, ctx
->i1
, "");
2842 ac_build_ifcc(&ctx
->ac
, cond
, 6502);
2844 /* Store clamped colors to alloca variables within the conditional block. */
2845 for (unsigned i
= 0; i
< noutput
; i
++) {
2846 if (outputs
[i
].semantic_name
!= TGSI_SEMANTIC_COLOR
&&
2847 outputs
[i
].semantic_name
!= TGSI_SEMANTIC_BCOLOR
)
2850 for (unsigned j
= 0; j
< 4; j
++) {
2851 LLVMBuildStore(ctx
->ac
.builder
,
2852 ac_build_clamp(&ctx
->ac
, outputs
[i
].values
[j
]),
2856 ac_build_endif(&ctx
->ac
, 6502);
2858 /* Load clamped colors */
2859 for (unsigned i
= 0; i
< noutput
; i
++) {
2860 if (outputs
[i
].semantic_name
!= TGSI_SEMANTIC_COLOR
&&
2861 outputs
[i
].semantic_name
!= TGSI_SEMANTIC_BCOLOR
)
2864 for (unsigned j
= 0; j
< 4; j
++) {
2865 outputs
[i
].values
[j
] =
2866 LLVMBuildLoad(ctx
->ac
.builder
, addr
[i
][j
], "");
2871 /* Generate export instructions for hardware VS shader stage or NGG GS stage
2872 * (position and parameter data only).
2874 void si_llvm_export_vs(struct si_shader_context
*ctx
,
2875 struct si_shader_output_values
*outputs
,
2878 struct si_shader
*shader
= ctx
->shader
;
2879 struct ac_export_args pos_args
[4] = {};
2880 LLVMValueRef psize_value
= NULL
, edgeflag_value
= NULL
, layer_value
= NULL
, viewport_index_value
= NULL
;
2884 si_vertex_color_clamping(ctx
, outputs
, noutput
);
2886 /* Build position exports. */
2887 for (i
= 0; i
< noutput
; i
++) {
2888 switch (outputs
[i
].semantic_name
) {
2889 case TGSI_SEMANTIC_POSITION
:
2890 si_llvm_init_export_args(ctx
, outputs
[i
].values
,
2891 V_008DFC_SQ_EXP_POS
, &pos_args
[0]);
2893 case TGSI_SEMANTIC_PSIZE
:
2894 psize_value
= outputs
[i
].values
[0];
2896 case TGSI_SEMANTIC_LAYER
:
2897 layer_value
= outputs
[i
].values
[0];
2899 case TGSI_SEMANTIC_VIEWPORT_INDEX
:
2900 viewport_index_value
= outputs
[i
].values
[0];
2902 case TGSI_SEMANTIC_EDGEFLAG
:
2903 edgeflag_value
= outputs
[i
].values
[0];
2905 case TGSI_SEMANTIC_CLIPDIST
:
2906 if (!shader
->key
.opt
.clip_disable
) {
2907 unsigned index
= 2 + outputs
[i
].semantic_index
;