{
unsigned lds_vertex_size = 0;
+ /* The edgeflag is always stored in the last element that's also
+ * used for padding to reduce LDS bank conflicts. */
if (shader->selector->so.num_outputs)
lds_vertex_size = 4 * shader->selector->info.num_outputs + 1;
+ if (shader->selector->ngg_writes_edgeflag)
+ lds_vertex_size = MAX2(lds_vertex_size, 1);
return lds_vertex_size;
}
LLVMValueRef vertex_ptr = NULL;
- if (sel->so.num_outputs)
+ if (sel->so.num_outputs || sel->ngg_writes_edgeflag)
vertex_ptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
for (unsigned i = 0; i < info->num_outputs; i++) {
LLVMBuildStore(builder, tmp2, tmp);
}
}
+
+ /* Store the edgeflag at the end (if streamout is enabled) */
+ if (info->output_semantic_name[i] == TGSI_SEMANTIC_EDGEFLAG &&
+ sel->ngg_writes_edgeflag) {
+ LLVMValueRef edgeflag = LLVMBuildLoad(builder, addrs[4 * i], "");
+ /* The output is a float, but the hw expects a 1-bit integer. */
+ edgeflag = LLVMBuildFPToUI(ctx->ac.builder, edgeflag, ctx->i32, "");
+ edgeflag = ac_build_umin(&ctx->ac, edgeflag, ctx->i32_1);
+
+ tmp = LLVMConstInt(ctx->i32, ngg_nogs_vertex_size(ctx->shader) - 1, 0);
+ tmp = ac_build_gep0(&ctx->ac, vertex_ptr, tmp);
+ LLVMBuildStore(builder, edgeflag, tmp);
+ }
}
lp_build_endif(&ctx->merged_wrap_if_state);
emitted_prims = nggso.emit[0];
}
+ LLVMValueRef user_edgeflags[3] = {};
+
+ if (sel->ngg_writes_edgeflag) {
+ /* Streamout already inserted the barrier, so don't insert it again. */
+ if (!sel->so.num_outputs)
+ ac_build_s_barrier(&ctx->ac);
+
+ ac_build_ifcc(&ctx->ac, is_gs_thread, 5400);
+ /* Load edge flags from ES threads and store them into VGPRs in GS threads. */
+ for (unsigned i = 0; i < num_vertices; i++) {
+ tmp = ngg_nogs_vertex_ptr(ctx, vtxindex[i]);
+ tmp2 = LLVMConstInt(ctx->i32, ngg_nogs_vertex_size(ctx->shader) - 1, 0);
+ tmp = ac_build_gep0(&ctx->ac, tmp, tmp2);
+ tmp = LLVMBuildLoad(builder, tmp, "");
+ tmp = LLVMBuildTrunc(builder, tmp, ctx->i1, "");
+
+ user_edgeflags[i] = ac_build_alloca_undef(&ctx->ac, ctx->i1, "");
+ LLVMBuildStore(builder, tmp, user_edgeflags[i]);
+ }
+ ac_build_endif(&ctx->ac, 5400);
+ }
+
/* Copy Primitive IDs from GS threads to the LDS address corresponding
* to the ES thread of the provoking vertex.
*/
if (ctx->type == PIPE_SHADER_VERTEX &&
ctx->shader->key.mono.u.vs_export_prim_id) {
- /* Streamout uses LDS. We need to wait for it before we can reuse it. */
- if (sel->so.num_outputs)
+ /* Streamout and edge flags use LDS. Make it idle, so that we can reuse it. */
+ if (sel->so.num_outputs || sel->ngg_writes_edgeflag)
ac_build_s_barrier(&ctx->ac);
ac_build_ifcc(&ctx->ac, is_gs_thread, 5400);
ac_build_endif(&ctx->ac, 5400);
}
- /* TODO: primitive culling */
-
build_sendmsg_gs_alloc_req(ctx, ngg_get_vtx_cnt(ctx), ngg_get_prim_cnt(ctx));
/* Update query buffer */
memcpy(prim.index, vtxindex, sizeof(vtxindex[0]) * 3);
for (unsigned i = 0; i < num_vertices; ++i) {
+ if (ctx->type != PIPE_SHADER_VERTEX) {
+ prim.edgeflag[i] = ctx->i1false;
+ continue;
+ }
+
tmp = LLVMBuildLShr(builder, ctx->abi.gs_invocation_id,
LLVMConstInt(ctx->ac.i32, 8 + i, false), "");
prim.edgeflag[i] = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
+
+ if (sel->ngg_writes_edgeflag) {
+ tmp2 = LLVMBuildLoad(builder, user_edgeflags[i], "");
+ prim.edgeflag[i] = LLVMBuildAnd(builder, prim.edgeflag[i],
+ tmp2, "");
+ }
}
build_export_prim(ctx, &prim);
/* Write the misc vector (point size, edgeflag, layer, viewport). */
if (shader->selector->info.writes_psize ||
- shader->selector->info.writes_edgeflag ||
+ shader->selector->pos_writes_edgeflag ||
shader->selector->info.writes_viewport_index ||
shader->selector->info.writes_layer) {
pos_args[1].enabled_channels = shader->selector->info.writes_psize |
- (shader->selector->info.writes_edgeflag << 1) |
+ (shader->selector->pos_writes_edgeflag << 1) |
(shader->selector->info.writes_layer << 2);
pos_args[1].valid_mask = 0; /* EXEC mask */
if (shader->selector->info.writes_psize)
pos_args[1].out[0] = psize_value;
- if (shader->selector->info.writes_edgeflag) {
+ if (shader->selector->pos_writes_edgeflag) {
/* The output is a float, but the hw expects an integer
* with the first bit containing the edge flag. */
edgeflag_value = LLVMBuildFPToUI(ctx->ac.builder,
S_028B90_EN_MAX_VERT_OUT_PER_GS_INSTANCE(
shader->ngg.max_vert_out_per_gs_instance);
- /* User edge flags are set by the pos exports. If user edge flags are
- * not used, we must use hw-generated edge flags and pass them via
- * the prim export to prevent drawing lines on internal edges of
- * decomposed primitives (such as quads) with polygon mode = lines.
- *
- * TODO: We should combine hw-generated edge flags with user edge
- * flags in the shader.
+ /* Always output hw-generated edge flags and pass them via the prim
+ * export to prevent drawing lines on internal edges of decomposed
+ * primitives (such as quads) with polygon mode = lines. Only VS needs
+ * this.
*/
shader->ctx_reg.ngg.pa_cl_ngg_cntl =
- S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_type == PIPE_SHADER_VERTEX &&
- !gs_info->writes_edgeflag);
+ S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_type == PIPE_SHADER_VERTEX);
shader->ge_cntl =
S_03096C_PRIM_GRP_SIZE(shader->ngg.max_gsprims) |
!sel->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION] &&
!sel->so.num_outputs;
+ if (sel->type == PIPE_SHADER_VERTEX &&
+ sel->info.writes_edgeflag) {
+ if (sscreen->info.chip_class >= GFX10)
+ sel->ngg_writes_edgeflag = true;
+ else
+ sel->pos_writes_edgeflag = true;
+ }
+
/* Set which opcode uses which (i,j) pair. */
if (sel->info.uses_persp_opcode_interp_centroid)
sel->info.uses_persp_centroid = true;
/* PA_CL_VS_OUT_CNTL */
bool misc_vec_ena =
- sel->info.writes_psize || sel->info.writes_edgeflag ||
+ sel->info.writes_psize || sel->pos_writes_edgeflag ||
sel->info.writes_layer || sel->info.writes_viewport_index;
sel->pa_cl_vs_out_cntl =
S_02881C_USE_VTX_POINT_SIZE(sel->info.writes_psize) |
- S_02881C_USE_VTX_EDGE_FLAG(sel->info.writes_edgeflag) |
+ S_02881C_USE_VTX_EDGE_FLAG(sel->pos_writes_edgeflag) |
S_02881C_USE_VTX_RENDER_TARGET_INDX(sel->info.writes_layer) |
S_02881C_USE_VTX_VIEWPORT_INDX(sel->info.writes_viewport_index) |
S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |