radeonsi: add primitive culling stats to the HUD
authorMarek Olšák <marek.olsak@amd.com>
Wed, 20 Feb 2019 04:27:16 +0000 (23:27 -0500)
committerMarek Olšák <marek.olsak@amd.com>
Thu, 16 May 2019 17:13:36 +0000 (13:13 -0400)
Acked-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_query.c
src/gallium/drivers/radeonsi/si_query.h
src/gallium/drivers/radeonsi/si_state_draw.c

index 0d00a9b17b49d12d1beed2a445378e966a76c426..0fd3ec4675591a061d7660ea64be9352167c90c4 100644 (file)
@@ -1105,6 +1105,9 @@ struct si_context {
        unsigned                        num_resident_handles;
        uint64_t                        num_alloc_tex_transfer_bytes;
        unsigned                        last_tex_ps_draw_ratio; /* for query */
+       unsigned                        compute_num_verts_accepted;
+       unsigned                        compute_num_verts_rejected;
+       unsigned                        compute_num_verts_ineligible; /* due to low vertex count */
        unsigned                        context_roll;
 
        /* Queries. */
index 1dd9249d57cfc05d482785f19fb970433d601e87..1d541f194f35623531c0503419350b6b5a5e7213 100644 (file)
@@ -255,6 +255,15 @@ static bool si_query_sw_begin(struct si_context *sctx,
                query->begin_result =
                        p_atomic_read(&sctx->screen->num_shader_cache_hits);
                break;
+       case SI_QUERY_PD_NUM_PRIMS_ACCEPTED:
+               query->begin_result = sctx->compute_num_verts_accepted;
+               break;
+       case SI_QUERY_PD_NUM_PRIMS_REJECTED:
+               query->begin_result = sctx->compute_num_verts_rejected;
+               break;
+       case SI_QUERY_PD_NUM_PRIMS_INELIGIBLE:
+               query->begin_result = sctx->compute_num_verts_ineligible;
+               break;
        case SI_QUERY_GPIN_ASIC_ID:
        case SI_QUERY_GPIN_NUM_SIMD:
        case SI_QUERY_GPIN_NUM_RB:
@@ -420,6 +429,15 @@ static bool si_query_sw_end(struct si_context *sctx,
                query->end_result =
                        p_atomic_read(&sctx->screen->num_shader_cache_hits);
                break;
+       case SI_QUERY_PD_NUM_PRIMS_ACCEPTED:
+               query->end_result = sctx->compute_num_verts_accepted;
+               break;
+       case SI_QUERY_PD_NUM_PRIMS_REJECTED:
+               query->end_result = sctx->compute_num_verts_rejected;
+               break;
+       case SI_QUERY_PD_NUM_PRIMS_INELIGIBLE:
+               query->end_result = sctx->compute_num_verts_ineligible;
+               break;
        case SI_QUERY_GPIN_ASIC_ID:
        case SI_QUERY_GPIN_NUM_SIMD:
        case SI_QUERY_GPIN_NUM_RB:
@@ -465,6 +483,12 @@ static bool si_query_sw_get_result(struct si_context *sctx,
                result->u64 = (query->end_result - query->begin_result) * 100 /
                              (query->end_time - query->begin_time);
                return true;
+       case SI_QUERY_PD_NUM_PRIMS_ACCEPTED:
+       case SI_QUERY_PD_NUM_PRIMS_REJECTED:
+       case SI_QUERY_PD_NUM_PRIMS_INELIGIBLE:
+               result->u64 = ((unsigned)query->end_result -
+                              (unsigned)query->begin_result) / 3;
+               return true;
        case SI_QUERY_GPIN_ASIC_ID:
                result->u32 = 0;
                return true;
@@ -1782,6 +1806,10 @@ static struct pipe_driver_query_info si_driver_query_list[] = {
        X("GPU-surf-sync-busy",         GPU_SURF_SYNC_BUSY,     UINT64, AVERAGE),
        X("GPU-cp-dma-busy",            GPU_CP_DMA_BUSY,        UINT64, AVERAGE),
        X("GPU-scratch-ram-busy",       GPU_SCRATCH_RAM_BUSY,   UINT64, AVERAGE),
+
+       X("pd-num-prims-accepted",      PD_NUM_PRIMS_ACCEPTED,  UINT64, AVERAGE),
+       X("pd-num-prims-rejected",      PD_NUM_PRIMS_REJECTED,  UINT64, AVERAGE),
+       X("pd-num-prims-ineligible",    PD_NUM_PRIMS_INELIGIBLE,UINT64, AVERAGE),
 };
 
 #undef X
index 6ff0a239cb3c35297da77c88e1f6c0db6ff94add..c1918f4df2dcbf00de2535a6b56fa3eaba8ca968 100644 (file)
@@ -112,6 +112,9 @@ enum {
        SI_QUERY_GPIN_NUM_SE,
        SI_QUERY_TIME_ELAPSED_SDMA,
        SI_QUERY_TIME_ELAPSED_SDMA_SI, /* emulated, measured on the CPU */
+       SI_QUERY_PD_NUM_PRIMS_ACCEPTED,
+       SI_QUERY_PD_NUM_PRIMS_REJECTED,
+       SI_QUERY_PD_NUM_PRIMS_INELIGIBLE,
 
        SI_QUERY_FIRST_PERFCOUNTER = PIPE_QUERY_DRIVER_SPECIFIC + 100,
 };
index b4b3fe323ee68d3a9637aa685aca52d1aedd45fa..485efcb0dffee0f055d26a50187eb58416e490e1 100644 (file)
@@ -1591,14 +1591,17 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
                        }
                }
        } else {
-               direct_count = info->count * instance_count;
+               /* Multiply by 3 for strips and fans to get an approximate vertex
+                * count as triangles. */
+               direct_count = info->count * instance_count *
+                              (prim == PIPE_PRIM_TRIANGLES ? 1 : 3);
        }
 
        /* Determine if we can use the primitive discard compute shader. */
        if (si_compute_prim_discard_enabled(sctx) &&
-           /* Multiply by 3 for strips and fans to get the vertex count as triangles. */
-           direct_count * (prim == PIPE_PRIM_TRIANGLES ? 1 : 3) >
-           sctx->prim_discard_vertex_count_threshold &&
+           (direct_count > sctx->prim_discard_vertex_count_threshold ?
+            (sctx->compute_num_verts_rejected += direct_count, true) : /* Add, then return true. */
+            (sctx->compute_num_verts_ineligible += direct_count, false)) && /* Add, then return false. */
            (!info->count_from_stream_output || pd_msg("draw_opaque")) &&
            (primitive_restart ?
             /* Supported prim types with primitive restart: */
@@ -1648,10 +1651,13 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
                        index_size = 4;
                        instance_count = 1;
                        primitive_restart = false;
+                       sctx->compute_num_verts_rejected -= direct_count;
+                       sctx->compute_num_verts_accepted += direct_count;
                        break;
                case SI_PRIM_DISCARD_DISABLED:
                        break;
                case SI_PRIM_DISCARD_DRAW_SPLIT:
+                       sctx->compute_num_verts_rejected -= direct_count;
                        goto return_cleanup;
                }
        }