2 * Copyright 2017 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 #include "si_shader_internal.h"
29 #include "util/u_memory.h"
31 static LLVMValueRef
get_wave_id_in_tg(struct si_shader_context
*ctx
)
33 return si_unpack_param(ctx
, ctx
->param_merged_wave_info
, 24, 4);
36 static LLVMValueRef
ngg_get_vtx_cnt(struct si_shader_context
*ctx
)
38 return ac_build_bfe(&ctx
->ac
, ctx
->gs_tg_info
,
39 LLVMConstInt(ctx
->ac
.i32
, 12, false),
40 LLVMConstInt(ctx
->ac
.i32
, 9, false),
44 static LLVMValueRef
ngg_get_prim_cnt(struct si_shader_context
*ctx
)
46 return ac_build_bfe(&ctx
->ac
, ctx
->gs_tg_info
,
47 LLVMConstInt(ctx
->ac
.i32
, 22, false),
48 LLVMConstInt(ctx
->ac
.i32
, 9, false),
52 /* Send GS Alloc Req message from the first wave of the group to SPI.
54 * - bits 0..10: vertices in group
55 * - bits 12..22: primitives in group
57 static void build_sendmsg_gs_alloc_req(struct si_shader_context
*ctx
,
59 LLVMValueRef prim_cnt
)
61 LLVMBuilderRef builder
= ctx
->ac
.builder
;
64 tmp
= LLVMBuildICmp(builder
, LLVMIntEQ
, get_wave_id_in_tg(ctx
), ctx
->ac
.i32_0
, "");
65 ac_build_ifcc(&ctx
->ac
, tmp
, 5020);
67 tmp
= LLVMBuildShl(builder
, prim_cnt
, LLVMConstInt(ctx
->ac
.i32
, 12, false),"");
68 tmp
= LLVMBuildOr(builder
, tmp
, vtx_cnt
, "");
69 ac_build_sendmsg(&ctx
->ac
, AC_SENDMSG_GS_ALLOC_REQ
, tmp
);
71 ac_build_endif(&ctx
->ac
, 5020);
75 unsigned num_vertices
;
77 LLVMValueRef index
[3];
78 LLVMValueRef edgeflag
[3];
81 static void build_export_prim(struct si_shader_context
*ctx
,
82 const struct ngg_prim
*prim
)
84 LLVMBuilderRef builder
= ctx
->ac
.builder
;
85 struct ac_export_args args
;
88 tmp
= LLVMBuildZExt(builder
, prim
->isnull
, ctx
->ac
.i32
, "");
89 args
.out
[0] = LLVMBuildShl(builder
, tmp
, LLVMConstInt(ctx
->ac
.i32
, 31, false), "");
91 for (unsigned i
= 0; i
< prim
->num_vertices
; ++i
) {
92 tmp
= LLVMBuildShl(builder
, prim
->index
[i
],
93 LLVMConstInt(ctx
->ac
.i32
, 10 * i
, false), "");
94 args
.out
[0] = LLVMBuildOr(builder
, args
.out
[0], tmp
, "");
95 tmp
= LLVMBuildZExt(builder
, prim
->edgeflag
[i
], ctx
->ac
.i32
, "");
96 tmp
= LLVMBuildShl(builder
, tmp
,
97 LLVMConstInt(ctx
->ac
.i32
, 10 * i
+ 9, false), "");
98 args
.out
[0] = LLVMBuildOr(builder
, args
.out
[0], tmp
, "");
101 args
.out
[0] = LLVMBuildBitCast(builder
, args
.out
[0], ctx
->ac
.f32
, "");
102 args
.out
[1] = LLVMGetUndef(ctx
->ac
.f32
);
103 args
.out
[2] = LLVMGetUndef(ctx
->ac
.f32
);
104 args
.out
[3] = LLVMGetUndef(ctx
->ac
.f32
);
106 args
.target
= V_008DFC_SQ_EXP_PRIM
;
107 args
.enabled_channels
= 1;
109 args
.valid_mask
= false;
112 ac_build_export(&ctx
->ac
, &args
);
116 * Emit the epilogue of an API VS or TES shader compiled as ESGS shader.
118 void gfx10_emit_ngg_epilogue(struct ac_shader_abi
*abi
,
119 unsigned max_outputs
,
122 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
123 struct tgsi_shader_info
*info
= &ctx
->shader
->selector
->info
;
124 struct si_shader_output_values
*outputs
= NULL
;
125 LLVMBuilderRef builder
= ctx
->ac
.builder
;
126 struct lp_build_if_state if_state
;
129 assert(!ctx
->shader
->is_gs_copy_shader
);
130 assert(info
->num_outputs
<= max_outputs
);
132 outputs
= MALLOC((info
->num_outputs
+ 1) * sizeof(outputs
[0]));
134 for (unsigned i
= 0; i
< info
->num_outputs
; i
++) {
135 outputs
[i
].semantic_name
= info
->output_semantic_name
[i
];
136 outputs
[i
].semantic_index
= info
->output_semantic_index
[i
];
138 /* This is used only by streamout. */
139 for (unsigned j
= 0; j
< 4; j
++) {
140 outputs
[i
].values
[j
] =
141 LLVMBuildLoad(builder
,
144 outputs
[i
].vertex_stream
[j
] =
145 (info
->output_streams
[i
] >> (2 * j
)) & 3;
149 lp_build_endif(&ctx
->merged_wrap_if_state
);
151 LLVMValueRef prims_in_wave
= si_unpack_param(ctx
, ctx
->param_merged_wave_info
, 8, 8);
152 LLVMValueRef vtx_in_wave
= si_unpack_param(ctx
, ctx
->param_merged_wave_info
, 0, 8);
153 LLVMValueRef is_gs_thread
= LLVMBuildICmp(builder
, LLVMIntULT
,
154 ac_get_thread_id(&ctx
->ac
), prims_in_wave
, "");
155 LLVMValueRef is_es_thread
= LLVMBuildICmp(builder
, LLVMIntULT
,
156 ac_get_thread_id(&ctx
->ac
), vtx_in_wave
, "");
157 LLVMValueRef vtxindex
[] = {
158 si_unpack_param(ctx
, ctx
->param_gs_vtx01_offset
, 0, 16),
159 si_unpack_param(ctx
, ctx
->param_gs_vtx01_offset
, 16, 16),
160 si_unpack_param(ctx
, ctx
->param_gs_vtx23_offset
, 0, 16),
163 /* Determine the number of vertices per primitive. */
164 unsigned num_vertices
;
165 LLVMValueRef num_vertices_val
;
167 if (ctx
->type
== PIPE_SHADER_VERTEX
) {
168 if (info
->properties
[TGSI_PROPERTY_VS_BLIT_SGPRS
]) {
169 /* Blits always use axis-aligned rectangles with 3 vertices. */
171 num_vertices_val
= LLVMConstInt(ctx
->i32
, 3, 0);
173 /* Extract OUTPRIM field. */
174 tmp
= si_unpack_param(ctx
, ctx
->param_vs_state_bits
, 2, 2);
175 num_vertices_val
= LLVMBuildAdd(builder
, tmp
, ctx
->i32_1
, "");
176 num_vertices
= 3; /* TODO: optimize for points & lines */
179 assert(ctx
->type
== PIPE_SHADER_TESS_EVAL
);
181 if (info
->properties
[TGSI_PROPERTY_TES_POINT_MODE
])
183 else if (info
->properties
[TGSI_PROPERTY_TES_PRIM_MODE
] == PIPE_PRIM_LINES
)
188 num_vertices_val
= LLVMConstInt(ctx
->i32
, num_vertices
, false);
191 /* TODO: streamout */
193 /* TODO: primitive culling */
195 build_sendmsg_gs_alloc_req(ctx
, ngg_get_vtx_cnt(ctx
), ngg_get_prim_cnt(ctx
));
197 /* Export primitive data to the index buffer. Format is:
198 * - bits 0..8: index 0
199 * - bit 9: edge flag 0
200 * - bits 10..18: index 1
201 * - bit 19: edge flag 1
202 * - bits 20..28: index 2
203 * - bit 29: edge flag 2
204 * - bit 31: null primitive (skip)
206 * For the first version, we will always build up all three indices
207 * independent of the primitive type. The additional garbage data
210 * TODO: culling depends on the primitive type, so can have some
213 lp_build_if(&if_state
, &ctx
->gallivm
, is_gs_thread
);
215 struct ngg_prim prim
= {};
217 prim
.num_vertices
= num_vertices
;
218 prim
.isnull
= ctx
->ac
.i1false
;
219 memcpy(prim
.index
, vtxindex
, sizeof(vtxindex
[0]) * 3);
221 for (unsigned i
= 0; i
< num_vertices
; ++i
) {
222 tmp
= LLVMBuildLShr(builder
, ctx
->abi
.gs_invocation_id
,
223 LLVMConstInt(ctx
->ac
.i32
, 8 + i
, false), "");
224 prim
.edgeflag
[i
] = LLVMBuildTrunc(builder
, tmp
, ctx
->ac
.i1
, "");
227 build_export_prim(ctx
, &prim
);
229 lp_build_endif(&if_state
);
231 /* Export per-vertex data (positions and parameters). */
232 lp_build_if(&if_state
, &ctx
->gallivm
, is_es_thread
);
236 /* Unconditionally (re-)load the values for proper SSA form. */
237 for (i
= 0; i
< info
->num_outputs
; i
++) {
238 for (unsigned j
= 0; j
< 4; j
++) {
239 outputs
[i
].values
[j
] =
240 LLVMBuildLoad(builder
,
246 /* TODO: Vertex shaders have to get PrimitiveID from GS VGPRs. */
247 if (ctx
->type
== PIPE_SHADER_TESS_EVAL
&&
248 ctx
->shader
->key
.mono
.u
.vs_export_prim_id
) {
249 outputs
[i
].semantic_name
= TGSI_SEMANTIC_PRIMID
;
250 outputs
[i
].semantic_index
= 0;
251 outputs
[i
].values
[0] = ac_to_float(&ctx
->ac
, si_get_primitive_id(ctx
, 0));
252 for (unsigned j
= 1; j
< 4; j
++)
253 outputs
[i
].values
[j
] = LLVMGetUndef(ctx
->f32
);
255 memset(outputs
[i
].vertex_stream
, 0,
256 sizeof(outputs
[i
].vertex_stream
));
260 si_llvm_export_vs(ctx
, outputs
, i
);
262 lp_build_endif(&if_state
);