static LLVMValueRef ngg_get_initial_edgeflag(struct si_shader_context *ctx, unsigned index)
{
- if (ctx->type == PIPE_SHADER_VERTEX) {
+ if (ctx->stage == MESA_SHADER_VERTEX) {
LLVMValueRef tmp;
tmp = LLVMBuildLShr(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args.gs_invocation_id),
LLVMConstInt(ctx->ac.i32, 8 + index, false), "");
{
const struct si_shader_info *info = &ctx->shader->selector->info;
- if (ctx->type == PIPE_SHADER_VERTEX) {
- if (info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD]) {
+ if (ctx->stage == MESA_SHADER_VERTEX) {
+ if (info->base.vs.blit_sgprs_amd) {
/* Blits always use axis-aligned rectangles with 3 vertices. */
*num_vertices = 3;
return LLVMConstInt(ctx->ac.i32, 3, 0);
return LLVMBuildAdd(ctx->ac.builder, num, ctx->ac.i32_1, "");
}
} else {
- assert(ctx->type == PIPE_SHADER_TESS_EVAL);
+ assert(ctx->stage == MESA_SHADER_TESS_EVAL);
- if (info->properties[TGSI_PROPERTY_TES_POINT_MODE])
+ if (info->base.tess.point_mode)
*num_vertices = 1;
- else if (info->properties[TGSI_PROPERTY_TES_PRIM_MODE] == PIPE_PRIM_LINES)
+ else if (info->base.tess.primitive_mode == GL_LINES)
*num_vertices = 2;
else
*num_vertices = 3;
assert(shader->key.as_ngg && !shader->key.as_es);
- return sel->type != PIPE_SHADER_GEOMETRY && !sel->info.writes_edgeflag;
+ return sel->info.stage != MESA_SHADER_GEOMETRY && !sel->info.writes_edgeflag;
}
void gfx10_ngg_build_sendmsg_gs_alloc_req(struct si_shader_context *ctx)
unsigned reg = so->output[i].register_index;
struct si_shader_output_values out;
- out.semantic_name = info->output_semantic_name[reg];
- out.semantic_index = info->output_semantic_index[reg];
+ out.semantic = info->output_semantic[reg];
for (unsigned comp = 0; comp < 4; comp++) {
tmp = ac_build_gep0(&ctx->ac, vertexptr, LLVMConstInt(ctx->ac.i32, 4 * reg + comp, false));
LLVMValueRef prim_stride_dw_vgpr = LLVMGetUndef(ctx->ac.i32);
int stream_for_buffer[4] = {-1, -1, -1, -1};
unsigned bufmask_for_stream[4] = {};
- bool isgs = ctx->type == PIPE_SHADER_GEOMETRY;
+ bool isgs = ctx->stage == MESA_SHADER_GEOMETRY;
unsigned scratch_emit_base = isgs ? 4 : 0;
LLVMValueRef scratch_emit_basev = isgs ? i32_4 : ctx->ac.i32_0;
unsigned scratch_offset_base = isgs ? 8 : 4;
* to the ES thread of the provoking vertex. All ES threads
* load and export PrimitiveID for their thread.
*/
- if (shader->selector->type == PIPE_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id)
+ if (shader->selector->info.stage == MESA_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id)
lds_vertex_size = MAX2(lds_vertex_size, 1);
if (shader->key.opt.ngg_culling) {
- if (shader->selector->type == PIPE_SHADER_VERTEX) {
+ if (shader->selector->info.stage == MESA_SHADER_VERTEX) {
STATIC_ASSERT(lds_instance_id + 1 == 9);
lds_vertex_size = MAX2(lds_vertex_size, 9);
} else {
- assert(shader->selector->type == PIPE_SHADER_TESS_EVAL);
+ assert(shader->selector->info.stage == MESA_SHADER_TESS_EVAL);
if (shader->selector->info.uses_primid || shader->key.mono.u.vs_export_prim_id) {
STATIC_ASSERT(lds_tes_patch_id + 2 == 11);
assert(shader->key.opt.ngg_culling);
assert(shader->key.as_ngg);
- assert(sel->type == PIPE_SHADER_VERTEX ||
- (sel->type == PIPE_SHADER_TESS_EVAL && !shader->key.as_es));
+ assert(sel->info.stage == MESA_SHADER_VERTEX ||
+ (sel->info.stage == MESA_SHADER_TESS_EVAL && !shader->key.as_es));
LLVMValueRef position[4] = {};
for (unsigned i = 0; i < info->num_outputs; i++) {
- switch (info->output_semantic_name[i]) {
- case TGSI_SEMANTIC_POSITION:
+ switch (info->output_semantic[i]) {
+ case VARYING_SLOT_POS:
for (unsigned j = 0; j < 4; j++) {
position[j] = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + j], "");
}
bool uses_tes_prim_id = false;
LLVMValueRef packed_data = ctx->ac.i32_0;
- if (ctx->type == PIPE_SHADER_VERTEX) {
+ if (ctx->stage == MESA_SHADER_VERTEX) {
uses_instance_id = sel->info.uses_instanceid ||
shader->key.part.vs.prolog.instance_divisor_is_one ||
shader->key.part.vs.prolog.instance_divisor_is_fetched;
} else {
uses_tes_prim_id = sel->info.uses_primid || shader->key.mono.u.vs_export_prim_id;
- assert(ctx->type == PIPE_SHADER_TESS_EVAL);
+ assert(ctx->stage == MESA_SHADER_TESS_EVAL);
LLVMBuildStore(builder, ac_to_integer(&ctx->ac, ac_get_arg(&ctx->ac, ctx->tes_u)),
ac_build_gep0(&ctx->ac, es_vtxptr, LLVMConstInt(ctx->ac.i32, lds_tes_u, 0)));
LLVMBuildStore(builder, ac_to_integer(&ctx->ac, ac_get_arg(&ctx->ac, ctx->tes_v)),
LLVMBuildStore(builder, tmp, es_data[i]);
}
- if (ctx->type == PIPE_SHADER_TESS_EVAL) {
+ if (ctx->stage == MESA_SHADER_TESS_EVAL) {
tmp = LLVMBuildLoad(builder,
si_build_gep_i8(ctx, old_es_vtxptr, lds_byte2_tes_rel_patch_id), "");
tmp = LLVMBuildZExt(builder, tmp, ctx->ac.i32, "");
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, new_gs_tg_info, 2, "");
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, new_merged_wave_info, 3, "");
- if (ctx->type == PIPE_SHADER_TESS_EVAL)
+ if (ctx->stage == MESA_SHADER_TESS_EVAL)
ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, 4);
ret = si_insert_input_ptr(ctx, ret, ctx->rw_buffers, 8 + SI_SGPR_RW_BUFFERS);
ret = si_insert_input_ptr(ctx, ret, ctx->samplers_and_images, 8 + SI_SGPR_SAMPLERS_AND_IMAGES);
ret = si_insert_input_ptr(ctx, ret, ctx->vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS);
- if (ctx->type == PIPE_SHADER_VERTEX) {
+ if (ctx->stage == MESA_SHADER_VERTEX) {
ret = si_insert_input_ptr(ctx, ret, ctx->args.base_vertex, 8 + SI_SGPR_BASE_VERTEX);
ret = si_insert_input_ptr(ctx, ret, ctx->args.start_instance, 8 + SI_SGPR_START_INSTANCE);
ret = si_insert_input_ptr(ctx, ret, ctx->args.draw_id, 8 + SI_SGPR_DRAWID);
8 + SI_SGPR_VS_VB_DESCRIPTOR_FIRST + i * 4);
}
} else {
- assert(ctx->type == PIPE_SHADER_TESS_EVAL);
+ assert(ctx->stage == MESA_SHADER_TESS_EVAL);
ret = si_insert_input_ptr(ctx, ret, ctx->tcs_offchip_layout, 8 + SI_SGPR_TES_OFFCHIP_LAYOUT);
ret = si_insert_input_ptr(ctx, ret, ctx->tes_offchip_addr, 8 + SI_SGPR_TES_OFFCHIP_ADDR);
}
unsigned vgpr;
- if (ctx->type == PIPE_SHADER_VERTEX) {
+ if (ctx->stage == MESA_SHADER_VERTEX) {
if (shader->selector->num_vbos_in_user_sgprs) {
vgpr = 8 + SI_SGPR_VS_VB_DESCRIPTOR_FIRST + shader->selector->num_vbos_in_user_sgprs * 4;
} else {
ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_invocation_id, vgpr++);
vgpr++; /* gs_vtx45_offset */
- if (ctx->type == PIPE_SHADER_VERTEX) {
+ if (ctx->stage == MESA_SHADER_VERTEX) {
val = LLVMBuildLoad(builder, es_data[0], "");
ret = LLVMBuildInsertValue(builder, ret, ac_to_float(&ctx->ac, val), vgpr++,
""); /* VGPR5 - VertexID */
vgpr++;
}
} else {
- assert(ctx->type == PIPE_SHADER_TESS_EVAL);
+ assert(ctx->stage == MESA_SHADER_TESS_EVAL);
unsigned num_vgprs = uses_tes_prim_id ? 4 : 3;
for (unsigned i = 0; i < num_vgprs; i++) {
val = LLVMBuildLoad(builder, es_data[i], "");
/* These two also use LDS. */
if (sel->info.writes_edgeflag ||
- (ctx->type == PIPE_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id))
+ (ctx->stage == MESA_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id))
ac_build_s_barrier(&ctx->ac);
ctx->return_value = ret;
vertex_ptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
for (unsigned i = 0; i < info->num_outputs; i++) {
- outputs[i].semantic_name = info->output_semantic_name[i];
- outputs[i].semantic_index = info->output_semantic_index[i];
+ outputs[i].semantic = info->output_semantic[i];
for (unsigned j = 0; j < 4; j++) {
outputs[i].vertex_stream[j] = (info->output_streams[i] >> (2 * j)) & 3;
}
/* Store the edgeflag at the end (if streamout is enabled) */
- if (info->output_semantic_name[i] == TGSI_SEMANTIC_EDGEFLAG && sel->info.writes_edgeflag) {
+ if (info->output_semantic[i] == VARYING_SLOT_EDGE && sel->info.writes_edgeflag) {
LLVMValueRef edgeflag = LLVMBuildLoad(builder, addrs[4 * i], "");
/* The output is a float, but the hw expects a 1-bit integer. */
edgeflag = LLVMBuildFPToUI(ctx->ac.builder, edgeflag, ctx->ac.i32, "");
bool unterminated_es_if_block =
!sel->so.num_outputs && !sel->info.writes_edgeflag &&
!ctx->screen->use_ngg_streamout && /* no query buffer */
- (ctx->type != PIPE_SHADER_VERTEX || !ctx->shader->key.mono.u.vs_export_prim_id);
+ (ctx->stage != MESA_SHADER_VERTEX || !ctx->shader->key.mono.u.vs_export_prim_id);
if (!unterminated_es_if_block)
ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
/* Copy Primitive IDs from GS threads to the LDS address corresponding
* to the ES thread of the provoking vertex.
*/
- if (ctx->type == PIPE_SHADER_VERTEX && ctx->shader->key.mono.u.vs_export_prim_id) {
+ if (ctx->stage == MESA_SHADER_VERTEX && ctx->shader->key.mono.u.vs_export_prim_id) {
assert(!unterminated_es_if_block);
/* Streamout and edge flags use LDS. Make it idle, so that we can reuse it. */
}
/* Update query buffer */
- if (ctx->screen->use_ngg_streamout && !info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD]) {
+ if (ctx->screen->use_ngg_streamout && !info->base.vs.blit_sgprs_amd) {
assert(!unterminated_es_if_block);
tmp = si_unpack_param(ctx, ctx->vs_state_bits, 6, 1);
* use the position from the current shader part. Instead,
* load it from LDS.
*/
- if (info->output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
+ if (info->output_semantic[i] == VARYING_SLOT_POS &&
ctx->shader->key.opt.ngg_culling) {
vertex_ptr = ngg_nogs_vertex_ptr(ctx, ac_get_arg(&ctx->ac, ctx->ngg_old_thread_id));
}
if (ctx->shader->key.mono.u.vs_export_prim_id) {
- outputs[i].semantic_name = TGSI_SEMANTIC_PRIMID;
- outputs[i].semantic_index = 0;
+ outputs[i].semantic = VARYING_SLOT_PRIMITIVE_ID;
- if (ctx->type == PIPE_SHADER_VERTEX) {
+ if (ctx->stage == MESA_SHADER_VERTEX) {
/* Wait for GS stores to finish. */
ac_build_s_barrier(&ctx->ac);
tmp = ac_build_gep0(&ctx->ac, tmp, ctx->ac.i32_0);
outputs[i].values[0] = LLVMBuildLoad(builder, tmp, "");
} else {
- assert(ctx->type == PIPE_SHADER_TESS_EVAL);
+ assert(ctx->stage == MESA_SHADER_TESS_EVAL);
outputs[i].values[0] = si_get_primitive_id(ctx, 0);
}
* is in emit order; that is:
* - during the epilogue, N is the threadidx (relative to the entire threadgroup)
* - during vertex emit, i.e. while the API GS shader invocation is running,
- * N = threadidx * gs_max_out_vertices + emitidx
+ * N = threadidx * gs.vertices_out + emitidx
*
* Goals of the LDS memory layout:
* 1. Eliminate bank conflicts on write for geometry shaders that have all emits
*
* Swizzling is required to satisfy points 1 and 2 simultaneously.
*
- * Vertices are stored in export order (gsthread * gs_max_out_vertices + emitidx).
+ * Vertices are stored in export order (gsthread * gs.vertices_out + emitidx).
* Indices are swizzled in groups of 32, which ensures point 1 without
* disturbing point 2.
*
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef storage = ngg_gs_get_vertex_storage(ctx);
- /* gs_max_out_vertices = 2^(write_stride_2exp) * some odd number */
- unsigned write_stride_2exp = ffs(sel->gs_max_out_vertices) - 1;
+ /* gs.vertices_out = 2^(write_stride_2exp) * some odd number */
+ unsigned write_stride_2exp = ffs(sel->info.base.gs.vertices_out) - 1;
if (write_stride_2exp) {
LLVMValueRef row = LLVMBuildLShr(builder, vertexidx, LLVMConstInt(ctx->ac.i32, 5, false), "");
LLVMValueRef swizzle = LLVMBuildAnd(
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef tmp;
- tmp = LLVMConstInt(ctx->ac.i32, sel->gs_max_out_vertices, false);
+ tmp = LLVMConstInt(ctx->ac.i32, sel->info.base.gs.vertices_out, false);
tmp = LLVMBuildMul(builder, tmp, gsthread, "");
const LLVMValueRef vertexidx = LLVMBuildAdd(builder, tmp, emitidx, "");
return ngg_gs_vertex_ptr(ctx, vertexidx);
*/
const LLVMValueRef can_emit =
LLVMBuildICmp(builder, LLVMIntULT, vertexidx,
- LLVMConstInt(ctx->ac.i32, sel->gs_max_out_vertices, false), "");
+ LLVMConstInt(ctx->ac.i32, sel->info.base.gs.vertices_out, false), "");
tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, "");
tmp = LLVMBuildSelect(builder, can_emit, tmp, vertexidx, "");
/* Determine and store whether this vertex completed a primitive. */
const LLVMValueRef curverts = LLVMBuildLoad(builder, ctx->gs_curprim_verts[stream], "");
- tmp = LLVMConstInt(ctx->ac.i32, u_vertices_per_prim(sel->gs_output_prim) - 1, false);
+ tmp = LLVMConstInt(ctx->ac.i32, u_vertices_per_prim(sel->info.base.gs.output_primitive) - 1, false);
const LLVMValueRef iscompleteprim = LLVMBuildICmp(builder, LLVMIntUGE, curverts, tmp, "");
/* Since the geometry shader emits triangle strips, we need to
* the correct vertex order.
*/
LLVMValueRef is_odd = ctx->ac.i1false;
- if (stream == 0 && u_vertices_per_prim(sel->gs_output_prim) == 3) {
+ if (stream == 0 && u_vertices_per_prim(sel->info.base.gs.output_primitive) == 3) {
tmp = LLVMBuildAnd(builder, curverts, ctx->ac.i32_1, "");
is_odd = LLVMBuildICmp(builder, LLVMIntEQ, tmp, ctx->ac.i32_1, "");
}
{
const struct si_shader_selector *sel = ctx->shader->selector;
const struct si_shader_info *info = &sel->info;
- const unsigned verts_per_prim = u_vertices_per_prim(sel->gs_output_prim);
+ const unsigned verts_per_prim = u_vertices_per_prim(sel->info.base.gs.output_primitive);
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef i8_0 = LLVMConstInt(ctx->ac.i8, 0, false);
LLVMValueRef tmp, tmp2;
const LLVMValueRef vertexidx = LLVMBuildLoad(builder, ctx->gs_next_vertex[stream], "");
tmp = LLVMBuildICmp(builder, LLVMIntUGE, vertexidx,
- LLVMConstInt(ctx->ac.i32, sel->gs_max_out_vertices, false), "");
+ LLVMConstInt(ctx->ac.i32, sel->info.base.gs.vertices_out, false), "");
ac_build_ifcc(&ctx->ac, tmp, 5101);
ac_build_break(&ctx->ac);
ac_build_endif(&ctx->ac, 5101);
unsigned out_idx = 0;
for (unsigned i = 0; i < info->num_outputs; i++) {
- outputs[i].semantic_name = info->output_semantic_name[i];
- outputs[i].semantic_index = info->output_semantic_index[i];
+ outputs[i].semantic = info->output_semantic[i];
for (unsigned j = 0; j < 4; j++, out_idx++) {
tmp = ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx);
*max_gsprims = MIN2(*max_gsprims, 1 + max_reuse);
}
+unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader)
+{
+ const struct si_shader_selector *sel = shader->selector;
+
+ if (sel->info.stage == MESA_SHADER_GEOMETRY && sel->so.num_outputs)
+ return 44;
+
+ return 8;
+}
+
/**
* Determine subgroup information like maximum number of vertices and prims.
*
const struct si_shader_selector *gs_sel = shader->selector;
const struct si_shader_selector *es_sel =
shader->previous_stage_sel ? shader->previous_stage_sel : gs_sel;
- const enum pipe_shader_type gs_type = gs_sel->type;
- const unsigned gs_num_invocations = MAX2(gs_sel->gs_num_invocations, 1);
+ const gl_shader_stage gs_stage = gs_sel->info.stage;
+ const unsigned gs_num_invocations = MAX2(gs_sel->info.base.gs.invocations, 1);
const unsigned input_prim = si_get_input_prim(gs_sel);
const bool use_adjacency =
input_prim >= PIPE_PRIM_LINES_ADJACENCY && input_prim <= PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY;
const unsigned max_verts_per_prim = u_vertices_per_prim(input_prim);
- const unsigned min_verts_per_prim = gs_type == PIPE_SHADER_GEOMETRY ? max_verts_per_prim : 1;
+ const unsigned min_verts_per_prim = gs_stage == MESA_SHADER_GEOMETRY ? max_verts_per_prim : 1;
/* All these are in dwords: */
- /* We can't allow using the whole LDS, because GS waves compete with
- * other shader stages for LDS space.
- *
- * TODO: We should really take the shader's internal LDS use into
- * account. The linker will fail if the size is greater than
- * 8K dwords.
+ /* GE can only use 8K dwords (32KB) of LDS per workgroup.
*/
- const unsigned max_lds_size = 8 * 1024 - 768;
+ const unsigned max_lds_size = 8 * 1024 - gfx10_ngg_get_scratch_dw_size(shader);
const unsigned target_lds_size = max_lds_size;
unsigned esvert_lds_size = 0;
unsigned gsprim_lds_size = 0;
*/
max_esverts_base = MIN2(max_esverts_base, 251 + max_verts_per_prim - 1);
- if (gs_type == PIPE_SHADER_GEOMETRY) {
+ if (gs_stage == MESA_SHADER_GEOMETRY) {
bool force_multi_cycling = false;
- unsigned max_out_verts_per_gsprim = gs_sel->gs_max_out_vertices * gs_num_invocations;
+ unsigned max_out_verts_per_gsprim = gs_sel->info.base.gs.vertices_out * gs_num_invocations;
retry_select_mode:
if (max_out_verts_per_gsprim <= 256 && !force_multi_cycling) {
* tessellation. */
max_vert_out_per_gs_instance = true;
max_gsprims_base = 1;
- max_out_verts_per_gsprim = gs_sel->gs_max_out_vertices;
+ max_out_verts_per_gsprim = gs_sel->info.base.gs.vertices_out;
}
esvert_lds_size = es_sel->esgs_itemsize / 4;
gsprim_lds_size = (gs_sel->gsvs_vertex_size / 4 + 1) * max_out_verts_per_gsprim;
if (gsprim_lds_size > target_lds_size && !force_multi_cycling) {
- if (gs_sel->tess_turns_off_ngg || es_sel->type != PIPE_SHADER_TESS_EVAL) {
+ if (gs_sel->tess_turns_off_ngg || es_sel->info.stage != MESA_SHADER_TESS_EVAL) {
force_multi_cycling = true;
goto retry_select_mode;
}
unsigned max_out_vertices =
max_vert_out_per_gs_instance
- ? gs_sel->gs_max_out_vertices
- : gs_type == PIPE_SHADER_GEOMETRY
- ? max_gsprims * gs_num_invocations * gs_sel->gs_max_out_vertices
+ ? gs_sel->info.base.gs.vertices_out
+ : gs_stage == MESA_SHADER_GEOMETRY
+ ? max_gsprims * gs_num_invocations * gs_sel->info.base.gs.vertices_out
: max_esverts;
assert(max_out_vertices <= 256);
unsigned prim_amp_factor = 1;
- if (gs_type == PIPE_SHADER_GEOMETRY) {
+ if (gs_stage == MESA_SHADER_GEOMETRY) {
/* Number of output primitives per GS input primitive after
* GS instancing. */
- prim_amp_factor = gs_sel->gs_max_out_vertices;
+ prim_amp_factor = gs_sel->info.base.gs.vertices_out;
}
/* The GE only checks against the maximum number of ES verts after