static LLVMValueRef ngg_get_initial_edgeflag(struct si_shader_context *ctx, unsigned index)
{
- if (ctx->type == PIPE_SHADER_VERTEX) {
+ if (ctx->stage == MESA_SHADER_VERTEX) {
LLVMValueRef tmp;
tmp = LLVMBuildLShr(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args.gs_invocation_id),
LLVMConstInt(ctx->ac.i32, 8 + index, false), "");
{
const struct si_shader_info *info = &ctx->shader->selector->info;
- if (ctx->type == PIPE_SHADER_VERTEX) {
+ if (ctx->stage == MESA_SHADER_VERTEX) {
if (info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD]) {
/* Blits always use axis-aligned rectangles with 3 vertices. */
*num_vertices = 3;
return LLVMBuildAdd(ctx->ac.builder, num, ctx->ac.i32_1, "");
}
} else {
- assert(ctx->type == PIPE_SHADER_TESS_EVAL);
+ assert(ctx->stage == MESA_SHADER_TESS_EVAL);
if (info->properties[TGSI_PROPERTY_TES_POINT_MODE])
*num_vertices = 1;
assert(shader->key.as_ngg && !shader->key.as_es);
- return sel->type != PIPE_SHADER_GEOMETRY && !sel->info.writes_edgeflag;
+ return sel->info.stage != MESA_SHADER_GEOMETRY && !sel->info.writes_edgeflag;
}
void gfx10_ngg_build_sendmsg_gs_alloc_req(struct si_shader_context *ctx)
unsigned reg = so->output[i].register_index;
struct si_shader_output_values out;
- out.semantic_name = info->output_semantic_name[reg];
- out.semantic_index = info->output_semantic_index[reg];
+ out.semantic = info->output_semantic[reg];
for (unsigned comp = 0; comp < 4; comp++) {
tmp = ac_build_gep0(&ctx->ac, vertexptr, LLVMConstInt(ctx->ac.i32, 4 * reg + comp, false));
LLVMValueRef prim_stride_dw_vgpr = LLVMGetUndef(ctx->ac.i32);
int stream_for_buffer[4] = {-1, -1, -1, -1};
unsigned bufmask_for_stream[4] = {};
- bool isgs = ctx->type == PIPE_SHADER_GEOMETRY;
+ bool isgs = ctx->stage == MESA_SHADER_GEOMETRY;
unsigned scratch_emit_base = isgs ? 4 : 0;
LLVMValueRef scratch_emit_basev = isgs ? i32_4 : ctx->ac.i32_0;
unsigned scratch_offset_base = isgs ? 8 : 4;
* to the ES thread of the provoking vertex. All ES threads
* load and export PrimitiveID for their thread.
*/
- if (shader->selector->type == PIPE_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id)
+ if (shader->selector->info.stage == MESA_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id)
lds_vertex_size = MAX2(lds_vertex_size, 1);
if (shader->key.opt.ngg_culling) {
- if (shader->selector->type == PIPE_SHADER_VERTEX) {
+ if (shader->selector->info.stage == MESA_SHADER_VERTEX) {
STATIC_ASSERT(lds_instance_id + 1 == 9);
lds_vertex_size = MAX2(lds_vertex_size, 9);
} else {
- assert(shader->selector->type == PIPE_SHADER_TESS_EVAL);
+ assert(shader->selector->info.stage == MESA_SHADER_TESS_EVAL);
if (shader->selector->info.uses_primid || shader->key.mono.u.vs_export_prim_id) {
STATIC_ASSERT(lds_tes_patch_id + 2 == 11);
assert(shader->key.opt.ngg_culling);
assert(shader->key.as_ngg);
- assert(sel->type == PIPE_SHADER_VERTEX ||
- (sel->type == PIPE_SHADER_TESS_EVAL && !shader->key.as_es));
+ assert(sel->info.stage == MESA_SHADER_VERTEX ||
+ (sel->info.stage == MESA_SHADER_TESS_EVAL && !shader->key.as_es));
LLVMValueRef position[4] = {};
for (unsigned i = 0; i < info->num_outputs; i++) {
- switch (info->output_semantic_name[i]) {
- case TGSI_SEMANTIC_POSITION:
+ switch (info->output_semantic[i]) {
+ case VARYING_SLOT_POS:
for (unsigned j = 0; j < 4; j++) {
position[j] = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + j], "");
}
bool uses_tes_prim_id = false;
LLVMValueRef packed_data = ctx->ac.i32_0;
- if (ctx->type == PIPE_SHADER_VERTEX) {
+ if (ctx->stage == MESA_SHADER_VERTEX) {
uses_instance_id = sel->info.uses_instanceid ||
shader->key.part.vs.prolog.instance_divisor_is_one ||
shader->key.part.vs.prolog.instance_divisor_is_fetched;
} else {
uses_tes_prim_id = sel->info.uses_primid || shader->key.mono.u.vs_export_prim_id;
- assert(ctx->type == PIPE_SHADER_TESS_EVAL);
+ assert(ctx->stage == MESA_SHADER_TESS_EVAL);
LLVMBuildStore(builder, ac_to_integer(&ctx->ac, ac_get_arg(&ctx->ac, ctx->tes_u)),
ac_build_gep0(&ctx->ac, es_vtxptr, LLVMConstInt(ctx->ac.i32, lds_tes_u, 0)));
LLVMBuildStore(builder, ac_to_integer(&ctx->ac, ac_get_arg(&ctx->ac, ctx->tes_v)),
LLVMBuildStore(builder, tmp, es_data[i]);
}
- if (ctx->type == PIPE_SHADER_TESS_EVAL) {
+ if (ctx->stage == MESA_SHADER_TESS_EVAL) {
tmp = LLVMBuildLoad(builder,
si_build_gep_i8(ctx, old_es_vtxptr, lds_byte2_tes_rel_patch_id), "");
tmp = LLVMBuildZExt(builder, tmp, ctx->ac.i32, "");
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, new_gs_tg_info, 2, "");
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, new_merged_wave_info, 3, "");
- if (ctx->type == PIPE_SHADER_TESS_EVAL)
+ if (ctx->stage == MESA_SHADER_TESS_EVAL)
ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, 4);
ret = si_insert_input_ptr(ctx, ret, ctx->rw_buffers, 8 + SI_SGPR_RW_BUFFERS);
ret = si_insert_input_ptr(ctx, ret, ctx->samplers_and_images, 8 + SI_SGPR_SAMPLERS_AND_IMAGES);
ret = si_insert_input_ptr(ctx, ret, ctx->vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS);
- if (ctx->type == PIPE_SHADER_VERTEX) {
+ if (ctx->stage == MESA_SHADER_VERTEX) {
ret = si_insert_input_ptr(ctx, ret, ctx->args.base_vertex, 8 + SI_SGPR_BASE_VERTEX);
ret = si_insert_input_ptr(ctx, ret, ctx->args.start_instance, 8 + SI_SGPR_START_INSTANCE);
ret = si_insert_input_ptr(ctx, ret, ctx->args.draw_id, 8 + SI_SGPR_DRAWID);
8 + SI_SGPR_VS_VB_DESCRIPTOR_FIRST + i * 4);
}
} else {
- assert(ctx->type == PIPE_SHADER_TESS_EVAL);
+ assert(ctx->stage == MESA_SHADER_TESS_EVAL);
ret = si_insert_input_ptr(ctx, ret, ctx->tcs_offchip_layout, 8 + SI_SGPR_TES_OFFCHIP_LAYOUT);
ret = si_insert_input_ptr(ctx, ret, ctx->tes_offchip_addr, 8 + SI_SGPR_TES_OFFCHIP_ADDR);
}
unsigned vgpr;
- if (ctx->type == PIPE_SHADER_VERTEX) {
+ if (ctx->stage == MESA_SHADER_VERTEX) {
if (shader->selector->num_vbos_in_user_sgprs) {
vgpr = 8 + SI_SGPR_VS_VB_DESCRIPTOR_FIRST + shader->selector->num_vbos_in_user_sgprs * 4;
} else {
ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_invocation_id, vgpr++);
vgpr++; /* gs_vtx45_offset */
- if (ctx->type == PIPE_SHADER_VERTEX) {
+ if (ctx->stage == MESA_SHADER_VERTEX) {
val = LLVMBuildLoad(builder, es_data[0], "");
ret = LLVMBuildInsertValue(builder, ret, ac_to_float(&ctx->ac, val), vgpr++,
""); /* VGPR5 - VertexID */
vgpr++;
}
} else {
- assert(ctx->type == PIPE_SHADER_TESS_EVAL);
+ assert(ctx->stage == MESA_SHADER_TESS_EVAL);
unsigned num_vgprs = uses_tes_prim_id ? 4 : 3;
for (unsigned i = 0; i < num_vgprs; i++) {
val = LLVMBuildLoad(builder, es_data[i], "");
/* These two also use LDS. */
if (sel->info.writes_edgeflag ||
- (ctx->type == PIPE_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id))
+ (ctx->stage == MESA_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id))
ac_build_s_barrier(&ctx->ac);
ctx->return_value = ret;
vertex_ptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
for (unsigned i = 0; i < info->num_outputs; i++) {
- outputs[i].semantic_name = info->output_semantic_name[i];
- outputs[i].semantic_index = info->output_semantic_index[i];
+ outputs[i].semantic = info->output_semantic[i];
for (unsigned j = 0; j < 4; j++) {
outputs[i].vertex_stream[j] = (info->output_streams[i] >> (2 * j)) & 3;
}
/* Store the edgeflag at the end (if streamout is enabled) */
- if (info->output_semantic_name[i] == TGSI_SEMANTIC_EDGEFLAG && sel->info.writes_edgeflag) {
+ if (info->output_semantic[i] == VARYING_SLOT_EDGE && sel->info.writes_edgeflag) {
LLVMValueRef edgeflag = LLVMBuildLoad(builder, addrs[4 * i], "");
/* The output is a float, but the hw expects a 1-bit integer. */
edgeflag = LLVMBuildFPToUI(ctx->ac.builder, edgeflag, ctx->ac.i32, "");
bool unterminated_es_if_block =
!sel->so.num_outputs && !sel->info.writes_edgeflag &&
!ctx->screen->use_ngg_streamout && /* no query buffer */
- (ctx->type != PIPE_SHADER_VERTEX || !ctx->shader->key.mono.u.vs_export_prim_id);
+ (ctx->stage != MESA_SHADER_VERTEX || !ctx->shader->key.mono.u.vs_export_prim_id);
if (!unterminated_es_if_block)
ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
/* Copy Primitive IDs from GS threads to the LDS address corresponding
* to the ES thread of the provoking vertex.
*/
- if (ctx->type == PIPE_SHADER_VERTEX && ctx->shader->key.mono.u.vs_export_prim_id) {
+ if (ctx->stage == MESA_SHADER_VERTEX && ctx->shader->key.mono.u.vs_export_prim_id) {
assert(!unterminated_es_if_block);
/* Streamout and edge flags use LDS. Make it idle, so that we can reuse it. */
* use the position from the current shader part. Instead,
* load it from LDS.
*/
- if (info->output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
+ if (info->output_semantic[i] == VARYING_SLOT_POS &&
ctx->shader->key.opt.ngg_culling) {
vertex_ptr = ngg_nogs_vertex_ptr(ctx, ac_get_arg(&ctx->ac, ctx->ngg_old_thread_id));
}
if (ctx->shader->key.mono.u.vs_export_prim_id) {
- outputs[i].semantic_name = TGSI_SEMANTIC_PRIMID;
- outputs[i].semantic_index = 0;
+ outputs[i].semantic = VARYING_SLOT_PRIMITIVE_ID;
- if (ctx->type == PIPE_SHADER_VERTEX) {
+ if (ctx->stage == MESA_SHADER_VERTEX) {
/* Wait for GS stores to finish. */
ac_build_s_barrier(&ctx->ac);
tmp = ac_build_gep0(&ctx->ac, tmp, ctx->ac.i32_0);
outputs[i].values[0] = LLVMBuildLoad(builder, tmp, "");
} else {
- assert(ctx->type == PIPE_SHADER_TESS_EVAL);
+ assert(ctx->stage == MESA_SHADER_TESS_EVAL);
outputs[i].values[0] = si_get_primitive_id(ctx, 0);
}
unsigned out_idx = 0;
for (unsigned i = 0; i < info->num_outputs; i++) {
- outputs[i].semantic_name = info->output_semantic_name[i];
- outputs[i].semantic_index = info->output_semantic_index[i];
+ outputs[i].semantic = info->output_semantic[i];
for (unsigned j = 0; j < 4; j++, out_idx++) {
tmp = ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx);
*max_gsprims = MIN2(*max_gsprims, 1 + max_reuse);
}
+unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader)
+{
+ const struct si_shader_selector *sel = shader->selector;
+
+ if (sel->info.stage == MESA_SHADER_GEOMETRY && sel->so.num_outputs)
+ return 44;
+
+ return 8;
+}
+
/**
* Determine subgroup information like maximum number of vertices and prims.
*
const struct si_shader_selector *gs_sel = shader->selector;
const struct si_shader_selector *es_sel =
shader->previous_stage_sel ? shader->previous_stage_sel : gs_sel;
- const enum pipe_shader_type gs_type = gs_sel->type;
+ const gl_shader_stage gs_stage = gs_sel->info.stage;
const unsigned gs_num_invocations = MAX2(gs_sel->gs_num_invocations, 1);
const unsigned input_prim = si_get_input_prim(gs_sel);
const bool use_adjacency =
input_prim >= PIPE_PRIM_LINES_ADJACENCY && input_prim <= PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY;
const unsigned max_verts_per_prim = u_vertices_per_prim(input_prim);
- const unsigned min_verts_per_prim = gs_type == PIPE_SHADER_GEOMETRY ? max_verts_per_prim : 1;
+ const unsigned min_verts_per_prim = gs_stage == MESA_SHADER_GEOMETRY ? max_verts_per_prim : 1;
/* All these are in dwords: */
- /* We can't allow using the whole LDS, because GS waves compete with
- * other shader stages for LDS space.
- *
- * TODO: We should really take the shader's internal LDS use into
- * account. The linker will fail if the size is greater than
- * 8K dwords.
+ /* GE can only use 8K dwords (32KB) of LDS per workgroup.
*/
- const unsigned max_lds_size = 8 * 1024 - 768;
+ const unsigned max_lds_size = 8 * 1024 - gfx10_ngg_get_scratch_dw_size(shader);
const unsigned target_lds_size = max_lds_size;
unsigned esvert_lds_size = 0;
unsigned gsprim_lds_size = 0;
*/
max_esverts_base = MIN2(max_esverts_base, 251 + max_verts_per_prim - 1);
- if (gs_type == PIPE_SHADER_GEOMETRY) {
+ if (gs_stage == MESA_SHADER_GEOMETRY) {
bool force_multi_cycling = false;
unsigned max_out_verts_per_gsprim = gs_sel->gs_max_out_vertices * gs_num_invocations;
gsprim_lds_size = (gs_sel->gsvs_vertex_size / 4 + 1) * max_out_verts_per_gsprim;
if (gsprim_lds_size > target_lds_size && !force_multi_cycling) {
- if (gs_sel->tess_turns_off_ngg || es_sel->type != PIPE_SHADER_TESS_EVAL) {
+ if (gs_sel->tess_turns_off_ngg || es_sel->info.stage != MESA_SHADER_TESS_EVAL) {
force_multi_cycling = true;
goto retry_select_mode;
}
max_esverts =
MIN2(max_esverts, (max_lds_size - max_gsprims * gsprim_lds_size) / esvert_lds_size);
max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
+ /* Hardware restriction: minimum value of max_esverts */
+ max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
max_gsprims = align(max_gsprims, wavesize);
max_gsprims = MIN2(max_gsprims, max_gsprims_base);
- if (gsprim_lds_size)
+ if (gsprim_lds_size) {
+ /* Don't count unusable vertices to the LDS size. Those are vertices above
+ * the maximum number of vertices that can occur in the workgroup,
+ * which is e.g. max_gsprims * 3 for triangles.
+ */
+ unsigned usable_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
max_gsprims =
- MIN2(max_gsprims, (max_lds_size - max_esverts * esvert_lds_size) / gsprim_lds_size);
+ MIN2(max_gsprims, (max_lds_size - usable_esverts * esvert_lds_size) / gsprim_lds_size);
+ }
clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, use_adjacency);
assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1);
} while (orig_max_esverts != max_esverts || orig_max_gsprims != max_gsprims);
- }
- /* Hardware restriction: minimum value of max_esverts */
- max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
+ /* Verify the restriction. */
+ assert(max_esverts >= min_esverts - 1 + max_verts_per_prim);
+ } else {
+ /* Hardware restriction: minimum value of max_esverts */
+ max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
+ }
unsigned max_out_vertices =
max_vert_out_per_gs_instance
? gs_sel->gs_max_out_vertices
- : gs_type == PIPE_SHADER_GEOMETRY
+ : gs_stage == MESA_SHADER_GEOMETRY
? max_gsprims * gs_num_invocations * gs_sel->gs_max_out_vertices
: max_esverts;
assert(max_out_vertices <= 256);
unsigned prim_amp_factor = 1;
- if (gs_type == PIPE_SHADER_GEOMETRY) {
+ if (gs_stage == MESA_SHADER_GEOMETRY) {
/* Number of output primitives per GS input primitive after
* GS instancing. */
prim_amp_factor = gs_sel->gs_max_out_vertices;
shader->ngg.prim_amp_factor = prim_amp_factor;
shader->ngg.max_vert_out_per_gs_instance = max_vert_out_per_gs_instance;
- shader->gs_info.esgs_ring_size = max_esverts * esvert_lds_size;
+ /* Don't count unusable vertices. */
+ shader->gs_info.esgs_ring_size = MIN2(max_esverts, max_gsprims * max_verts_per_prim) *
+ esvert_lds_size;
shader->ngg.ngg_emit_size = max_gsprims * gsprim_lds_size;
assert(shader->ngg.hw_max_esverts >= min_esverts); /* HW limitation */