radeonsi: add HUD queries for counting VS/PS/CS partial flushes
authorMarek Olšák <marek.olsak@amd.com>
Tue, 23 Aug 2016 13:17:35 +0000 (15:17 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Mon, 5 Sep 2016 16:01:15 +0000 (18:01 +0200)
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeon/r600_pipe_common.h
src/gallium/drivers/radeon/r600_query.c
src/gallium/drivers/radeon/r600_query.h
src/gallium/drivers/radeonsi/si_state_draw.c

index 624dea3dbd7a06935ff0aa02a581571260c6d86d..d821eaa63182c1f8c6830f81fdaf5487b801cee0 100644 (file)
@@ -558,6 +558,9 @@ struct r600_common_context {
        unsigned                        num_compute_calls;
        unsigned                        num_spill_compute_calls;
        unsigned                        num_dma_calls;
+       unsigned                        num_vs_flushes;
+       unsigned                        num_ps_flushes;
+       unsigned                        num_cs_flushes;
        uint64_t                        num_alloc_tex_transfer_bytes;
        unsigned                        last_tex_ps_draw_ratio; /* for query */
 
index 29ad249df68b251ff52177e102fdc3769e4bbe87..2c3d530aefa3997d5fb9382094d4199da802c890 100644 (file)
@@ -90,6 +90,15 @@ static bool r600_query_sw_begin(struct r600_common_context *rctx,
        case R600_QUERY_DMA_CALLS:
                query->begin_result = rctx->num_dma_calls;
                break;
+       case R600_QUERY_NUM_VS_FLUSHES:
+               query->begin_result = rctx->num_vs_flushes;
+               break;
+       case R600_QUERY_NUM_PS_FLUSHES:
+               query->begin_result = rctx->num_ps_flushes;
+               break;
+       case R600_QUERY_NUM_CS_FLUSHES:
+               query->begin_result = rctx->num_cs_flushes;
+               break;
        case R600_QUERY_REQUESTED_VRAM:
        case R600_QUERY_REQUESTED_GTT:
        case R600_QUERY_MAPPED_VRAM:
@@ -158,6 +167,15 @@ static bool r600_query_sw_end(struct r600_common_context *rctx,
        case R600_QUERY_DMA_CALLS:
                query->end_result = rctx->num_dma_calls;
                break;
+       case R600_QUERY_NUM_VS_FLUSHES:
+               query->end_result = rctx->num_vs_flushes;
+               break;
+       case R600_QUERY_NUM_PS_FLUSHES:
+               query->end_result = rctx->num_ps_flushes;
+               break;
+       case R600_QUERY_NUM_CS_FLUSHES:
+               query->end_result = rctx->num_cs_flushes;
+               break;
        case R600_QUERY_REQUESTED_VRAM:
        case R600_QUERY_REQUESTED_GTT:
        case R600_QUERY_MAPPED_VRAM:
@@ -1182,6 +1200,9 @@ static struct pipe_driver_query_info r600_driver_query_list[] = {
        X("compute-calls",              COMPUTE_CALLS,          UINT64, AVERAGE),
        X("spill-compute-calls",        SPILL_COMPUTE_CALLS,    UINT64, AVERAGE),
        X("dma-calls",                  DMA_CALLS,              UINT64, AVERAGE),
+       X("num-vs-flushes",             NUM_VS_FLUSHES,         UINT64, AVERAGE),
+       X("num-ps-flushes",             NUM_PS_FLUSHES,         UINT64, AVERAGE),
+       X("num-cs-flushes",             NUM_CS_FLUSHES,         UINT64, AVERAGE),
        X("requested-VRAM",             REQUESTED_VRAM,         BYTES, AVERAGE),
        X("requested-GTT",              REQUESTED_GTT,          BYTES, AVERAGE),
        X("mapped-VRAM",                MAPPED_VRAM,            BYTES, AVERAGE),
index 89f55161ac8de18ff45b2cc18d3ce2178622e1cc..0cd1a02402826d7f4e1a9800276f80d82baae35f 100644 (file)
@@ -46,6 +46,9 @@ enum {
        R600_QUERY_COMPUTE_CALLS,
        R600_QUERY_SPILL_COMPUTE_CALLS,
        R600_QUERY_DMA_CALLS,
+       R600_QUERY_NUM_VS_FLUSHES,
+       R600_QUERY_NUM_PS_FLUSHES,
+       R600_QUERY_NUM_CS_FLUSHES,
        R600_QUERY_REQUESTED_VRAM,
        R600_QUERY_REQUESTED_GTT,
        R600_QUERY_MAPPED_VRAM,
index 60cc3f05dfaf50ba295d4972fe25aedde01eb6fe..9e50bb2904233625afd1aaa13556e64fec492b9d 100644 (file)
@@ -773,14 +773,22 @@ void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom)
                if (sctx->flags & SI_CONTEXT_PS_PARTIAL_FLUSH) {
                        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
                        radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+                       /* Only count explicit shader flushes, not implicit ones
+                        * done by SURFACE_SYNC.
+                        */
+                       sctx->num_vs_flushes++;
+                       sctx->num_ps_flushes++;
                } else if (sctx->flags & SI_CONTEXT_VS_PARTIAL_FLUSH) {
                        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
                        radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+                       sctx->num_vs_flushes++;
                }
        }
+
        if (sctx->flags & SI_CONTEXT_CS_PARTIAL_FLUSH) {
                radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
                radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4)));
+               sctx->num_cs_flushes++;
        }
 
        /* VGT state synchronization. */