winsys/amdgpu: set AMDGPU_GEM_CREATE_VM_ALWAYS_VALID if possible v2
authorChristian König <christian.koenig@amd.com>
Tue, 29 Aug 2017 14:45:46 +0000 (16:45 +0200)
committerChristian König <christian.koenig@amd.com>
Thu, 31 Aug 2017 12:55:38 +0000 (14:55 +0200)
When the kernel supports it set the local flag and
stop adding those BOs to the BO list.

Can probably be optimized much more.

v2: rename new flag to AMDGPU_GEM_CREATE_VM_ALWAYS_VALID

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c

index 883a7c18e3e8ba988fe42b61c6ffa2bc2bc0158d..897b4f05965a3a21cea6b7f1e39af60b01e73ed7 100644 (file)
 #include <stdio.h>
 #include <inttypes.h>
 
+#ifndef AMDGPU_GEM_CREATE_VM_ALWAYS_VALID
+#define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6)
+#endif
+
 /* Set to 1 for verbose output showing committed sparse buffer ranges. */
 #define DEBUG_SPARSE_COMMITS 0
 
@@ -406,6 +410,9 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
       request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
    if (flags & RADEON_FLAG_GTT_WC)
       request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+   if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
+       ws->info.drm_minor >= 20)
+      request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
 
    r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
    if (r) {
@@ -439,6 +446,7 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
    bo->u.real.va_handle = va_handle;
    bo->initial_domain = initial_domain;
    bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
+   bo->is_local = !!(request.flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID);
 
    if (initial_domain & RADEON_DOMAIN_VRAM)
       ws->allocated_vram += align64(size, ws->info.gart_page_size);
index 1311344b8120ef5189bcdf754acc3e67e10cd718..10b095d7a1c58984dd283362e5c7dc09a2d8b973 100644 (file)
@@ -115,6 +115,8 @@ struct amdgpu_winsys_bo {
    unsigned num_fences;
    unsigned max_fences;
    struct pipe_fence_handle **fences;
+
+   bool is_local;
 };
 
 struct amdgpu_slab {
index 5ddde8e7944e742617d86a2a397927654d8d1c0f..f68071abc8f28f42a1cf09389b323be20766904b 100644 (file)
@@ -1121,6 +1121,8 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
       free(handles);
       mtx_unlock(&ws->global_bo_list_lock);
    } else {
+      unsigned num_handles;
+
       if (!amdgpu_add_sparse_backing_buffers(cs)) {
          r = -ENOMEM;
          goto bo_list_error;
@@ -1140,21 +1142,31 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
          }
       }
 
+      num_handles = 0;
       for (i = 0; i < cs->num_real_buffers; ++i) {
          struct amdgpu_cs_buffer *buffer = &cs->real_buffers[i];
 
+        if (buffer->bo->is_local)
+            continue;
+
          assert(buffer->u.real.priority_usage != 0);
 
-         cs->handles[i] = buffer->bo->bo;
-         cs->flags[i] = (util_last_bit64(buffer->u.real.priority_usage) - 1) / 4;
+         cs->handles[num_handles] = buffer->bo->bo;
+         cs->flags[num_handles] = (util_last_bit64(buffer->u.real.priority_usage) - 1) / 4;
+        ++num_handles;
       }
 
       if (acs->ring_type == RING_GFX)
          ws->gfx_bo_list_counter += cs->num_real_buffers;
 
-      r = amdgpu_bo_list_create(ws->dev, cs->num_real_buffers,
-                                cs->handles, cs->flags,
-                                &cs->request.resources);
+      if (num_handles) {
+         r = amdgpu_bo_list_create(ws->dev, num_handles,
+                                   cs->handles, cs->flags,
+                                   &cs->request.resources);
+      } else {
+         r = 0;
+        cs->request.resources = 0;
+      }
    }
 bo_list_error: