radeonsi: add SI_QUERY_TIME_ELAPSED_SDMA for measuring SDMA performance

author Marek Olšák <marek.olsak@amd.com>

Tue, 24 Jul 2018 17:14:29 +0000 (13:14 -0400)

committer Marek Olšák <marek.olsak@amd.com>

Wed, 29 Aug 2018 19:31:42 +0000 (15:31 -0400)
author Marek Olšák <marek.olsak@amd.com>
Tue, 24 Jul 2018 17:14:29 +0000 (13:14 -0400)
committer Marek Olšák <marek.olsak@amd.com>
Wed, 29 Aug 2018 19:31:42 +0000 (15:31 -0400)
diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h

index d9c4a1a741418a1fe878819b216f312cc2b8f813..d696c01d4ddd6692cdcacaaffe6e8573fb2a00df 100644 (file)
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -9140,6 +9140,10 @@
  #define    CIK_SDMA_PACKET_TRAP                    0x6
  #define    CIK_SDMA_PACKET_SEMAPHORE               0x7
  #define    CIK_SDMA_PACKET_CONSTANT_FILL           0xb
+#define    CIK_SDMA_OPCODE_TIMESTAMP               0xd
+#define        SDMA_TS_SUB_OPCODE_SET_LOCAL_TIMESTAMP     0x0
+#define        SDMA_TS_SUB_OPCODE_GET_LOCAL_TIMESTAMP     0x1
+#define        SDMA_TS_SUB_OPCODE_GET_GLOBAL_TIMESTAMP    0x2
  #define    CIK_SDMA_PACKET_SRBM_WRITE              0xe
  #define    CIK_SDMA_COPY_MAX_SIZE                  0x3fffe0
  
diff --git a/src/gallium/drivers/radeonsi/si_dma_cs.c b/src/gallium/drivers/radeonsi/si_dma_cs.c

index 3bb769309e3d8ade29d2b948b3337b0068c443f0..7db9570af3c4437f22026423333531b9219d4763 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_dma_cs.c
+++ b/src/gallium/drivers/radeonsi/si_dma_cs.c
@@ -23,6 +23,7 @@
   */
  
  #include "si_pipe.h"
+#include "sid.h"
  
  static void si_dma_emit_wait_idle(struct si_context *sctx)
  {
@@ -35,6 +36,34 @@ static void si_dma_emit_wait_idle(struct si_context *sctx)
                 radeon_emit(cs, 0xf0000000); /* NOP */
  }
  
+void si_dma_emit_timestamp(struct si_context *sctx, struct r600_resource *dst,
+                          uint64_t offset)
+{
+       struct radeon_cmdbuf *cs = sctx->dma_cs;
+       uint64_t va = dst->gpu_address + offset;
+
+       if (sctx->chip_class == SI) {
+               unreachable("SI DMA doesn't support the timestamp packet.");
+               return;
+       }
+
+       /* Mark the buffer range of destination as valid (initialized),
+        * so that transfer_map knows it should wait for the GPU when mapping
+        * that range. */
+       util_range_add(&dst->valid_buffer_range, offset, offset + 8);
+
+       assert(va % 8 == 0);
+
+       si_need_dma_space(sctx, 4, dst, NULL);
+       si_dma_emit_wait_idle(sctx);
+
+       radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_TIMESTAMP,
+                                       SDMA_TS_SUB_OPCODE_GET_GLOBAL_TIMESTAMP,
+                                       0));
+       radeon_emit(cs, va);
+       radeon_emit(cs, va >> 32);
+}
+
  void si_need_dma_space(struct si_context *ctx, unsigned num_dw,
                        struct r600_resource *dst, struct r600_resource *src)
  {
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h

index 95489f09612eef2ca9394ddb2bc2e544178be201..4c3f13b84e216a0aab4d8202df5b4d60a46cf7f3 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1155,6 +1155,8 @@ bool si_replace_shader(unsigned num, struct ac_shader_binary *binary);
  void si_init_dma_functions(struct si_context *sctx);
  
  /* si_dma_cs.c */
+void si_dma_emit_timestamp(struct si_context *sctx, struct r600_resource *dst,
+                          uint64_t offset);
  void si_need_dma_space(struct si_context *ctx, unsigned num_dw,
                        struct r600_resource *dst, struct r600_resource *src);
  void si_flush_dma_cs(struct si_context *ctx, unsigned flags,
diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c

index f768b531139ea0a19c8d16efd52eb724671de4fe..93efbd4ef4a1445a2fa9de89a3dd9cee2d3b4dac 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_query.c
+++ b/src/gallium/drivers/radeonsi/si_query.c
@@ -648,6 +648,11 @@ static struct pipe_query *si_query_hw_create(struct si_screen *sscreen,
                 query->result_size += 16; /* for the fence + alignment */
                 query->num_cs_dw_end = 6 + si_gfx_write_fence_dwords(sscreen);
                 break;
+       case SI_QUERY_TIME_ELAPSED_SDMA:
+               /* GET_GLOBAL_TIMESTAMP only works if the offset is a multiple of 32. */
+               query->result_size = 64;
+               query->num_cs_dw_end = 0;
+               break;
         case PIPE_QUERY_TIME_ELAPSED:
                 query->result_size = 24;
                 query->num_cs_dw_end = 8 + si_gfx_write_fence_dwords(sscreen);
@@ -747,6 +752,9 @@ static void si_query_hw_do_emit_start(struct si_context *sctx,
         struct radeon_cmdbuf *cs = sctx->gfx_cs;
  
         switch (query->b.type) {
+       case SI_QUERY_TIME_ELAPSED_SDMA:
+               si_dma_emit_timestamp(sctx, buffer, va - buffer->gpu_address);
+               return;
         case PIPE_QUERY_OCCLUSION_COUNTER:
         case PIPE_QUERY_OCCLUSION_PREDICATE:
         case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
@@ -802,7 +810,8 @@ static void si_query_hw_emit_start(struct si_context *sctx,
         si_update_occlusion_query_state(sctx, query->b.type, 1);
         si_update_prims_generated_query_state(sctx, query->b.type, 1);
  
-       si_need_gfx_cs_space(sctx);
+       if (query->b.type != SI_QUERY_TIME_ELAPSED_SDMA)
+               si_need_gfx_cs_space(sctx);
  
         /* Get a new query buffer if needed. */
         if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) {
@@ -832,6 +841,9 @@ static void si_query_hw_do_emit_stop(struct si_context *sctx,
         uint64_t fence_va = 0;
  
         switch (query->b.type) {
+       case SI_QUERY_TIME_ELAPSED_SDMA:
+               si_dma_emit_timestamp(sctx, buffer, va + 32 - buffer->gpu_address);
+               return;
         case PIPE_QUERY_OCCLUSION_COUNTER:
         case PIPE_QUERY_OCCLUSION_PREDICATE:
         case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
@@ -1022,7 +1034,8 @@ static struct pipe_query *si_create_query(struct pipe_context *ctx, unsigned que
  
         if (query_type == PIPE_QUERY_TIMESTAMP_DISJOINT ||
             query_type == PIPE_QUERY_GPU_FINISHED ||
-           query_type >= PIPE_QUERY_DRIVER_SPECIFIC)
+           (query_type >= PIPE_QUERY_DRIVER_SPECIFIC &&
+            query_type != SI_QUERY_TIME_ELAPSED_SDMA))
                 return si_query_sw_create(query_type);
  
         return si_query_hw_create(sscreen, query_type, index);
@@ -1238,6 +1251,9 @@ static void si_query_hw_add_result(struct si_screen *sscreen,
         case PIPE_QUERY_TIME_ELAPSED:
                 result->u64 += si_query_read_result(buffer, 0, 2, false);
                 break;
+       case SI_QUERY_TIME_ELAPSED_SDMA:
+               result->u64 += si_query_read_result(buffer, 0, 32/4, false);
+               break;
         case PIPE_QUERY_TIMESTAMP:
                 result->u64 = *(uint64_t*)buffer;
                 break;
@@ -1382,6 +1398,7 @@ bool si_query_hw_get_result(struct si_context *sctx,
  
         /* Convert the time to expected units. */
         if (rquery->type == PIPE_QUERY_TIME_ELAPSED ||
+           rquery->type == SI_QUERY_TIME_ELAPSED_SDMA ||
             rquery->type == PIPE_QUERY_TIMESTAMP) {
                 result->u64 = (1000000 * result->u64) / sscreen->info.clock_crystal_freq;
         }
diff --git a/src/gallium/drivers/radeonsi/si_query.h b/src/gallium/drivers/radeonsi/si_query.h

index 3f60208e2f88b7525de8ffabc4e43a780e4dedd6..bc3eb397bc559c42e3d5c262cb1895d9aed26abe 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_query.h
+++ b/src/gallium/drivers/radeonsi/si_query.h
@@ -109,6 +109,7 @@ enum {
         SI_QUERY_GPIN_NUM_RB,
         SI_QUERY_GPIN_NUM_SPI,
         SI_QUERY_GPIN_NUM_SE,
+       SI_QUERY_TIME_ELAPSED_SDMA,
  
         SI_QUERY_FIRST_PERFCOUNTER = PIPE_QUERY_DRIVER_SPECIFIC + 100,
  };
author	Marek Olšák <marek.olsak@amd.com>
	Tue, 24 Jul 2018 17:14:29 +0000 (13:14 -0400)
committer	Marek Olšák <marek.olsak@amd.com>
	Wed, 29 Aug 2018 19:31:42 +0000 (15:31 -0400)
src/amd/common/sid.h		patch \| blob \| history
src/gallium/drivers/radeonsi/si_dma_cs.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_pipe.h		patch \| blob \| history
src/gallium/drivers/radeonsi/si_query.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_query.h		patch \| blob \| history