gallium/radeon: add a heuristic for better (S)DMA performance
authorMarek Olšák <marek.olsak@amd.com>
Fri, 29 Apr 2016 23:21:22 +0000 (01:21 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 10 May 2016 15:20:09 +0000 (17:20 +0200)
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeon/r600_pipe_common.c
src/gallium/drivers/radeon/radeon_winsys.h
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
src/gallium/winsys/radeon/drm/radeon_drm_cs.c

index ce8d76963a992d5a0307b162383a1fc846298c0e..bc7f5f53e5d9cde19656de486df01d7059803ab8 100644 (file)
@@ -176,6 +176,20 @@ void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
        /* done at the end of DMA calls, so increment this. */
        rctx->num_dma_calls++;
 
+       /* IBs using too little memory are limited by the IB submission overhead.
+        * IBs using too much memory are limited by the kernel/TTM overhead.
+        * Too long IBs create CPU-GPU pipeline bubbles and add latency.
+        *
+        * This heuristic makes sure that DMA requests are executed
+        * very soon after the call is made and lowers memory usage.
+        * It improves texture upload performance by keeping the DMA
+        * engine busy while uploads are being submitted.
+        */
+       if (rctx->ws->cs_query_memory_usage(rctx->dma.cs) > 64 * 1024 * 1024) {
+               rctx->dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
+               return;
+       }
+
        r600_need_dma_space(rctx, 1, NULL, NULL);
 
        if (cs->cdw == 0) /* empty queue */
index 442a4613e96be77e8495bd86848c47879f161ad7..e73fa14a17e61be1b763591f16b40215f30dd0a2 100644 (file)
@@ -683,6 +683,8 @@ struct radeon_winsys {
      */
     boolean (*cs_memory_below_limit)(struct radeon_winsys_cs *cs, uint64_t vram, uint64_t gtt);
 
+    uint64_t (*cs_query_memory_usage)(struct radeon_winsys_cs *cs);
+
     /**
      * Return the buffer list.
      *
index 9534b972a469f147a139491b4bf7abbf25c59d0f..a5d703347c75f38c1e4b1f323548cd30bf6ce5f4 100644 (file)
@@ -563,6 +563,13 @@ static boolean amdgpu_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64
    return gtt < ws->info.gart_size * 0.7;
 }
 
+static uint64_t amdgpu_cs_query_memory_usage(struct radeon_winsys_cs *rcs)
+{
+   struct amdgpu_cs *cs = amdgpu_cs(rcs);
+
+   return cs->used_vram + cs->used_gart;
+}
+
 static unsigned amdgpu_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
                                           struct radeon_bo_list_item *list)
 {
@@ -828,6 +835,7 @@ void amdgpu_cs_init_functions(struct amdgpu_winsys *ws)
    ws->base.cs_lookup_buffer = amdgpu_cs_lookup_buffer;
    ws->base.cs_validate = amdgpu_cs_validate;
    ws->base.cs_memory_below_limit = amdgpu_cs_memory_below_limit;
+   ws->base.cs_query_memory_usage = amdgpu_cs_query_memory_usage;
    ws->base.cs_get_buffer_list = amdgpu_cs_get_buffer_list;
    ws->base.cs_flush = amdgpu_cs_flush;
    ws->base.cs_is_buffer_referenced = amdgpu_bo_is_referenced;
index 7a901a175f87b0032b03978b7d89faef207f79c9..9ac50816f8619e96a4bd605a461deaa64846f5cd 100644 (file)
@@ -398,6 +398,13 @@ static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, ui
     return gtt < cs->ws->info.gart_size * 0.7;
 }
 
+static uint64_t radeon_drm_cs_query_memory_usage(struct radeon_winsys_cs *rcs)
+{
+   struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+
+   return cs->csc->used_vram + cs->csc->used_gart;
+}
+
 static unsigned radeon_drm_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
                                               struct radeon_bo_list_item *list)
 {
@@ -671,6 +678,7 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
     ws->base.cs_lookup_buffer = radeon_drm_cs_lookup_buffer;
     ws->base.cs_validate = radeon_drm_cs_validate;
     ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
+    ws->base.cs_query_memory_usage = radeon_drm_cs_query_memory_usage;
     ws->base.cs_get_buffer_list = radeon_drm_cs_get_buffer_list;
     ws->base.cs_flush = radeon_drm_cs_flush;
     ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;