From 214b565bc28bc4419f3eec29ab7bbe34080459fe Mon Sep 17 00:00:00 2001 From: =?utf8?q?Christian=20K=C3=B6nig?= Date: Tue, 29 Aug 2017 16:45:46 +0200 Subject: [PATCH] winsys/amdgpu: set AMDGPU_GEM_CREATE_VM_ALWAYS_VALID if possible v2 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit When the kernel supports it set the local flag and stop adding those BOs to the BO list. Can probably be optimized much more. v2: rename new flag to AMDGPU_GEM_CREATE_VM_ALWAYS_VALID Reviewed-by: Marek Olšák --- src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 8 ++++++++ src/gallium/winsys/amdgpu/drm/amdgpu_bo.h | 2 ++ src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 22 +++++++++++++++++----- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 883a7c18e3e..897b4f05965 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -38,6 +38,10 @@ #include #include +#ifndef AMDGPU_GEM_CREATE_VM_ALWAYS_VALID +#define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6) +#endif + /* Set to 1 for verbose output showing committed sparse buffer ranges. */ #define DEBUG_SPARSE_COMMITS 0 @@ -406,6 +410,9 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; if (flags & RADEON_FLAG_GTT_WC) request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC; + if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING && + ws->info.drm_minor >= 20) + request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID; r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle); if (r) { @@ -439,6 +446,7 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, bo->u.real.va_handle = va_handle; bo->initial_domain = initial_domain; bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1); + bo->is_local = !!(request.flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID); if (initial_domain & RADEON_DOMAIN_VRAM) ws->allocated_vram += align64(size, ws->info.gart_page_size); diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h index 1311344b812..10b095d7a1c 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h @@ -115,6 +115,8 @@ struct amdgpu_winsys_bo { unsigned num_fences; unsigned max_fences; struct pipe_fence_handle **fences; + + bool is_local; }; struct amdgpu_slab { diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 5ddde8e7944..f68071abc8f 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -1121,6 +1121,8 @@ void amdgpu_cs_submit_ib(void *job, int thread_index) free(handles); mtx_unlock(&ws->global_bo_list_lock); } else { + unsigned num_handles; + if (!amdgpu_add_sparse_backing_buffers(cs)) { r = -ENOMEM; goto bo_list_error; @@ -1140,21 +1142,31 @@ void amdgpu_cs_submit_ib(void *job, int thread_index) } } + num_handles = 0; for (i = 0; i < cs->num_real_buffers; ++i) { struct amdgpu_cs_buffer *buffer = &cs->real_buffers[i]; + if (buffer->bo->is_local) + continue; + assert(buffer->u.real.priority_usage != 0); - cs->handles[i] = buffer->bo->bo; - cs->flags[i] = (util_last_bit64(buffer->u.real.priority_usage) - 1) / 4; + cs->handles[num_handles] = buffer->bo->bo; + cs->flags[num_handles] = (util_last_bit64(buffer->u.real.priority_usage) - 1) / 4; + ++num_handles; } if (acs->ring_type == RING_GFX) ws->gfx_bo_list_counter += cs->num_real_buffers; - r = amdgpu_bo_list_create(ws->dev, cs->num_real_buffers, - cs->handles, cs->flags, - &cs->request.resources); + if (num_handles) { + r = amdgpu_bo_list_create(ws->dev, num_handles, + cs->handles, cs->flags, + &cs->request.resources); + } else { + r = 0; + cs->request.resources = 0; + } } bo_list_error: -- 2.30.2