radeonsi/gfx10: generate VS and TES as NGG merged ESGS shaders
[mesa.git] / src / gallium / drivers / radeonsi / gfx10_shader_ngg.c
1 /*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "si_pipe.h"
25 #include "si_shader_internal.h"
26
27 #include "sid.h"
28
29 #include "util/u_memory.h"
30
31 static LLVMValueRef get_wave_id_in_tg(struct si_shader_context *ctx)
32 {
33 return si_unpack_param(ctx, ctx->param_merged_wave_info, 24, 4);
34 }
35
36 static LLVMValueRef ngg_get_vtx_cnt(struct si_shader_context *ctx)
37 {
38 return ac_build_bfe(&ctx->ac, ctx->gs_tg_info,
39 LLVMConstInt(ctx->ac.i32, 12, false),
40 LLVMConstInt(ctx->ac.i32, 9, false),
41 false);
42 }
43
44 static LLVMValueRef ngg_get_prim_cnt(struct si_shader_context *ctx)
45 {
46 return ac_build_bfe(&ctx->ac, ctx->gs_tg_info,
47 LLVMConstInt(ctx->ac.i32, 22, false),
48 LLVMConstInt(ctx->ac.i32, 9, false),
49 false);
50 }
51
52 /* Send GS Alloc Req message from the first wave of the group to SPI.
53 * Message payload is:
54 * - bits 0..10: vertices in group
55 * - bits 12..22: primitives in group
56 */
57 static void build_sendmsg_gs_alloc_req(struct si_shader_context *ctx,
58 LLVMValueRef vtx_cnt,
59 LLVMValueRef prim_cnt)
60 {
61 LLVMBuilderRef builder = ctx->ac.builder;
62 LLVMValueRef tmp;
63
64 tmp = LLVMBuildICmp(builder, LLVMIntEQ, get_wave_id_in_tg(ctx), ctx->ac.i32_0, "");
65 ac_build_ifcc(&ctx->ac, tmp, 5020);
66
67 tmp = LLVMBuildShl(builder, prim_cnt, LLVMConstInt(ctx->ac.i32, 12, false),"");
68 tmp = LLVMBuildOr(builder, tmp, vtx_cnt, "");
69 ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_ALLOC_REQ, tmp);
70
71 ac_build_endif(&ctx->ac, 5020);
72 }
73
74 struct ngg_prim {
75 unsigned num_vertices;
76 LLVMValueRef isnull;
77 LLVMValueRef index[3];
78 LLVMValueRef edgeflag[3];
79 };
80
81 static void build_export_prim(struct si_shader_context *ctx,
82 const struct ngg_prim *prim)
83 {
84 LLVMBuilderRef builder = ctx->ac.builder;
85 struct ac_export_args args;
86 LLVMValueRef tmp;
87
88 tmp = LLVMBuildZExt(builder, prim->isnull, ctx->ac.i32, "");
89 args.out[0] = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->ac.i32, 31, false), "");
90
91 for (unsigned i = 0; i < prim->num_vertices; ++i) {
92 tmp = LLVMBuildShl(builder, prim->index[i],
93 LLVMConstInt(ctx->ac.i32, 10 * i, false), "");
94 args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, "");
95 tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->ac.i32, "");
96 tmp = LLVMBuildShl(builder, tmp,
97 LLVMConstInt(ctx->ac.i32, 10 * i + 9, false), "");
98 args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, "");
99 }
100
101 args.out[0] = LLVMBuildBitCast(builder, args.out[0], ctx->ac.f32, "");
102 args.out[1] = LLVMGetUndef(ctx->ac.f32);
103 args.out[2] = LLVMGetUndef(ctx->ac.f32);
104 args.out[3] = LLVMGetUndef(ctx->ac.f32);
105
106 args.target = V_008DFC_SQ_EXP_PRIM;
107 args.enabled_channels = 1;
108 args.done = true;
109 args.valid_mask = false;
110 args.compr = false;
111
112 ac_build_export(&ctx->ac, &args);
113 }
114
115 /**
116 * Emit the epilogue of an API VS or TES shader compiled as ESGS shader.
117 */
118 void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
119 unsigned max_outputs,
120 LLVMValueRef *addrs)
121 {
122 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
123 struct tgsi_shader_info *info = &ctx->shader->selector->info;
124 struct si_shader_output_values *outputs = NULL;
125 LLVMBuilderRef builder = ctx->ac.builder;
126 struct lp_build_if_state if_state;
127 LLVMValueRef tmp;
128
129 assert(!ctx->shader->is_gs_copy_shader);
130 assert(info->num_outputs <= max_outputs);
131
132 outputs = MALLOC((info->num_outputs + 1) * sizeof(outputs[0]));
133
134 for (unsigned i = 0; i < info->num_outputs; i++) {
135 outputs[i].semantic_name = info->output_semantic_name[i];
136 outputs[i].semantic_index = info->output_semantic_index[i];
137
138 /* This is used only by streamout. */
139 for (unsigned j = 0; j < 4; j++) {
140 outputs[i].values[j] =
141 LLVMBuildLoad(builder,
142 addrs[4 * i + j],
143 "");
144 outputs[i].vertex_stream[j] =
145 (info->output_streams[i] >> (2 * j)) & 3;
146 }
147 }
148
149 lp_build_endif(&ctx->merged_wrap_if_state);
150
151 LLVMValueRef prims_in_wave = si_unpack_param(ctx, ctx->param_merged_wave_info, 8, 8);
152 LLVMValueRef vtx_in_wave = si_unpack_param(ctx, ctx->param_merged_wave_info, 0, 8);
153 LLVMValueRef is_gs_thread = LLVMBuildICmp(builder, LLVMIntULT,
154 ac_get_thread_id(&ctx->ac), prims_in_wave, "");
155 LLVMValueRef is_es_thread = LLVMBuildICmp(builder, LLVMIntULT,
156 ac_get_thread_id(&ctx->ac), vtx_in_wave, "");
157 LLVMValueRef vtxindex[] = {
158 si_unpack_param(ctx, ctx->param_gs_vtx01_offset, 0, 16),
159 si_unpack_param(ctx, ctx->param_gs_vtx01_offset, 16, 16),
160 si_unpack_param(ctx, ctx->param_gs_vtx23_offset, 0, 16),
161 };
162
163 /* Determine the number of vertices per primitive. */
164 unsigned num_vertices;
165 LLVMValueRef num_vertices_val;
166
167 if (ctx->type == PIPE_SHADER_VERTEX) {
168 if (info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS]) {
169 /* Blits always use axis-aligned rectangles with 3 vertices. */
170 num_vertices = 3;
171 num_vertices_val = LLVMConstInt(ctx->i32, 3, 0);
172 } else {
173 /* Extract OUTPRIM field. */
174 tmp = si_unpack_param(ctx, ctx->param_vs_state_bits, 2, 2);
175 num_vertices_val = LLVMBuildAdd(builder, tmp, ctx->i32_1, "");
176 num_vertices = 3; /* TODO: optimize for points & lines */
177 }
178 } else {
179 assert(ctx->type == PIPE_SHADER_TESS_EVAL);
180
181 if (info->properties[TGSI_PROPERTY_TES_POINT_MODE])
182 num_vertices = 1;
183 else if (info->properties[TGSI_PROPERTY_TES_PRIM_MODE] == PIPE_PRIM_LINES)
184 num_vertices = 2;
185 else
186 num_vertices = 3;
187
188 num_vertices_val = LLVMConstInt(ctx->i32, num_vertices, false);
189 }
190
191 /* TODO: streamout */
192
193 /* TODO: primitive culling */
194
195 build_sendmsg_gs_alloc_req(ctx, ngg_get_vtx_cnt(ctx), ngg_get_prim_cnt(ctx));
196
197 /* Export primitive data to the index buffer. Format is:
198 * - bits 0..8: index 0
199 * - bit 9: edge flag 0
200 * - bits 10..18: index 1
201 * - bit 19: edge flag 1
202 * - bits 20..28: index 2
203 * - bit 29: edge flag 2
204 * - bit 31: null primitive (skip)
205 *
206 * For the first version, we will always build up all three indices
207 * independent of the primitive type. The additional garbage data
208 * shouldn't hurt.
209 *
210 * TODO: culling depends on the primitive type, so can have some
211 * interaction here.
212 */
213 lp_build_if(&if_state, &ctx->gallivm, is_gs_thread);
214 {
215 struct ngg_prim prim = {};
216
217 prim.num_vertices = num_vertices;
218 prim.isnull = ctx->ac.i1false;
219 memcpy(prim.index, vtxindex, sizeof(vtxindex[0]) * 3);
220
221 for (unsigned i = 0; i < num_vertices; ++i) {
222 tmp = LLVMBuildLShr(builder, ctx->abi.gs_invocation_id,
223 LLVMConstInt(ctx->ac.i32, 8 + i, false), "");
224 prim.edgeflag[i] = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
225 }
226
227 build_export_prim(ctx, &prim);
228 }
229 lp_build_endif(&if_state);
230
231 /* Export per-vertex data (positions and parameters). */
232 lp_build_if(&if_state, &ctx->gallivm, is_es_thread);
233 {
234 unsigned i;
235
236 /* Unconditionally (re-)load the values for proper SSA form. */
237 for (i = 0; i < info->num_outputs; i++) {
238 for (unsigned j = 0; j < 4; j++) {
239 outputs[i].values[j] =
240 LLVMBuildLoad(builder,
241 addrs[4 * i + j],
242 "");
243 }
244 }
245
246 /* TODO: Vertex shaders have to get PrimitiveID from GS VGPRs. */
247 if (ctx->type == PIPE_SHADER_TESS_EVAL &&
248 ctx->shader->key.mono.u.vs_export_prim_id) {
249 outputs[i].semantic_name = TGSI_SEMANTIC_PRIMID;
250 outputs[i].semantic_index = 0;
251 outputs[i].values[0] = ac_to_float(&ctx->ac, si_get_primitive_id(ctx, 0));
252 for (unsigned j = 1; j < 4; j++)
253 outputs[i].values[j] = LLVMGetUndef(ctx->f32);
254
255 memset(outputs[i].vertex_stream, 0,
256 sizeof(outputs[i].vertex_stream));
257 i++;
258 }
259
260 si_llvm_export_vs(ctx, outputs, i);
261 }
262 lp_build_endif(&if_state);
263
264 FREE(outputs);
265 }