radeonsi/gfx10: jump over the shader query atomic if the queries are disabled
authorMarek Olšák <marek.olsak@amd.com>
Fri, 21 Jun 2019 22:38:58 +0000 (18:38 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Wed, 3 Jul 2019 19:51:13 +0000 (15:51 -0400)
Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/gallium/drivers/radeonsi/gfx10_query.c
src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
src/gallium/drivers/radeonsi/si_shader.h

index 8584b2af505ebed7f16310f8d4041e60fd835a9f..56ecbd54850e523591286419b5cde23c2e9b5d26 100644 (file)
@@ -180,6 +180,7 @@ success:;
        sbuf.buffer_offset = qbuf->head;
        sbuf.buffer_size = sizeof(struct gfx10_sh_query_buffer_mem);
        si_set_rw_shader_buffer(sctx, GFX10_GS_QUERY_BUF, &sbuf);
        sbuf.buffer_offset = qbuf->head;
        sbuf.buffer_size = sizeof(struct gfx10_sh_query_buffer_mem);
        si_set_rw_shader_buffer(sctx, GFX10_GS_QUERY_BUF, &sbuf);
+       sctx->current_vs_state |= S_VS_STATE_STREAMOUT_QUERY_ENABLED(1);
 
        si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_query);
        return true;
 
        si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_query);
        return true;
@@ -242,6 +243,7 @@ static bool gfx10_sh_query_end(struct si_context *sctx, struct si_query *rquery)
                gfx10_alloc_query_buffer(sctx);
        } else {
                si_set_rw_shader_buffer(sctx, GFX10_GS_QUERY_BUF, NULL);
                gfx10_alloc_query_buffer(sctx);
        } else {
                si_set_rw_shader_buffer(sctx, GFX10_GS_QUERY_BUF, NULL);
+               sctx->current_vs_state &= C_VS_STATE_STREAMOUT_QUERY_ENABLED;
 
                /* If a query_begin is followed by a query_end without a draw
                 * in-between, we need to clear the atom to ensure that the
 
                /* If a query_begin is followed by a query_end without a draw
                 * in-between, we need to clear the atom to ensure that the
index 8fbce10012f5ddb76a0b9c79f8161f767767022e..3713975224599a7f94ad328ad033c374f2019863 100644 (file)
@@ -647,6 +647,9 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
        /* Update query buffer */
        /* TODO: this won't catch 96-bit clear_buffer via transform feedback. */
        if (!info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS]) {
        /* Update query buffer */
        /* TODO: this won't catch 96-bit clear_buffer via transform feedback. */
        if (!info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS]) {
+               tmp = si_unpack_param(ctx, ctx->param_vs_state_bits, 6, 1);
+               tmp = LLVMBuildTrunc(builder, tmp, ctx->i1, "");
+               ac_build_ifcc(&ctx->ac, tmp, 5029); /* if (STREAMOUT_QUERY_ENABLED) */
                tmp = LLVMBuildICmp(builder, LLVMIntEQ, get_wave_id_in_tg(ctx), ctx->ac.i32_0, "");
                ac_build_ifcc(&ctx->ac, tmp, 5030);
                tmp = LLVMBuildICmp(builder, LLVMIntULE, ac_get_thread_id(&ctx->ac),
                tmp = LLVMBuildICmp(builder, LLVMIntEQ, get_wave_id_in_tg(ctx), ctx->ac.i32_0, "");
                ac_build_ifcc(&ctx->ac, tmp, 5030);
                tmp = LLVMBuildICmp(builder, LLVMIntULE, ac_get_thread_id(&ctx->ac),
@@ -673,6 +676,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
                }
                ac_build_endif(&ctx->ac, 5031);
                ac_build_endif(&ctx->ac, 5030);
                }
                ac_build_endif(&ctx->ac, 5031);
                ac_build_endif(&ctx->ac, 5030);
+               ac_build_endif(&ctx->ac, 5029);
        }
 
        /* Export primitive data to the index buffer. Format is:
        }
 
        /* Export primitive data to the index buffer. Format is:
@@ -1044,6 +1048,9 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx)
        }
 
        /* Write shader query data. */
        }
 
        /* Write shader query data. */
+       tmp = si_unpack_param(ctx, ctx->param_vs_state_bits, 6, 1);
+       tmp = LLVMBuildTrunc(builder, tmp, ctx->i1, "");
+       ac_build_ifcc(&ctx->ac, tmp, 5109); /* if (STREAMOUT_QUERY_ENABLED) */
        unsigned num_query_comps = sel->so.num_outputs ? 8 : 4;
        tmp = LLVMBuildICmp(builder, LLVMIntULT, tid,
                            LLVMConstInt(ctx->i32, num_query_comps, false), "");
        unsigned num_query_comps = sel->so.num_outputs ? 8 : 4;
        tmp = LLVMBuildICmp(builder, LLVMIntULT, tid,
                            LLVMConstInt(ctx->i32, num_query_comps, false), "");
@@ -1072,6 +1079,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx)
                                   ctx->i32, args, 5, 0);
        }
        ac_build_endif(&ctx->ac, 5110);
                                   ctx->i32, args, 5, 0);
        }
        ac_build_endif(&ctx->ac, 5110);
+       ac_build_endif(&ctx->ac, 5109);
 
        /* TODO: culling */
 
 
        /* TODO: culling */
 
index 801895b240cb0d0bd283ec4cf3c264d0f6f2387d..2649a7cd5b7496062e72c7fc6e937040f5971544 100644 (file)
@@ -249,6 +249,8 @@ enum {
 #define C_VS_STATE_OUTPRIM                     0xFFFFFFF3
 #define S_VS_STATE_PROVOKING_VTX_INDEX(x)      (((unsigned)(x) & 0x3) << 4)
 #define C_VS_STATE_PROVOKING_VTX_INDEX         0xFFFFFFCF
 #define C_VS_STATE_OUTPRIM                     0xFFFFFFF3
 #define S_VS_STATE_PROVOKING_VTX_INDEX(x)      (((unsigned)(x) & 0x3) << 4)
 #define C_VS_STATE_PROVOKING_VTX_INDEX         0xFFFFFFCF
+#define S_VS_STATE_STREAMOUT_QUERY_ENABLED(x)  (((unsigned)(x) & 0x1) << 6)
+#define C_VS_STATE_STREAMOUT_QUERY_ENABLED     0xFFFFFFBF
 #define S_VS_STATE_LS_OUT_PATCH_SIZE(x)                (((unsigned)(x) & 0x1FFF) << 8)
 #define C_VS_STATE_LS_OUT_PATCH_SIZE           0xFFE000FF
 #define S_VS_STATE_LS_OUT_VERTEX_SIZE(x)       (((unsigned)(x) & 0xFF) << 24)
 #define S_VS_STATE_LS_OUT_PATCH_SIZE(x)                (((unsigned)(x) & 0x1FFF) << 8)
 #define C_VS_STATE_LS_OUT_PATCH_SIZE           0xFFE000FF
 #define S_VS_STATE_LS_OUT_VERTEX_SIZE(x)       (((unsigned)(x) & 0xFF) << 24)