From f457e1b6d5814e51cb9e0ae47e8fd5936139f42f Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 30 Apr 2020 18:42:27 +0200 Subject: [PATCH] radv/winsys: do not count visible VRAM buffers twice in the budget The VRAM size returned to apps is computed as follows: vram_size = real_hw_vram_size - visible_vram_size. Visible VRAM buffers should be counted only in the visible VRAM counter and not twice. Buffers with the NO_CPU_ACCESS flag are known to not be mappable, so they are counted in the VRAM counter. Other buffers, with the CPU_ACCESS flag, or without any of both (imported buffers) are counted in the visible VRAM counter because they are mappable. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Part-of: --- src/amd/vulkan/radv_radeon_winsys.h | 2 +- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c | 48 ++++++++++++------- 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h index ef2b4849692..e5a4ae57d99 100644 --- a/src/amd/vulkan/radv_radeon_winsys.h +++ b/src/amd/vulkan/radv_radeon_winsys.h @@ -162,7 +162,7 @@ struct radeon_winsys_fence; struct radeon_winsys_bo { uint64_t va; bool is_local; - bool vram_cpu_access; + bool vram_no_cpu_access; }; struct radv_winsys_sem_counts { uint32_t syncobj_count; diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c index 740d7d23fa6..99bd44226ae 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c @@ -276,12 +276,16 @@ static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo) amdgpu_bo_free(bo->bo); } - if (bo->initial_domain & RADEON_DOMAIN_VRAM) - p_atomic_add(&ws->allocated_vram, - -align64(bo->size, ws->info.gart_page_size)); - if (bo->base.vram_cpu_access) - p_atomic_add(&ws->allocated_vram_vis, - -align64(bo->size, ws->info.gart_page_size)); + if (bo->initial_domain & RADEON_DOMAIN_VRAM) { + if (bo->base.vram_no_cpu_access) { + p_atomic_add(&ws->allocated_vram, + -align64(bo->size, ws->info.gart_page_size)); + } else { + p_atomic_add(&ws->allocated_vram_vis, + -align64(bo->size, ws->info.gart_page_size)); + } + } + if (bo->initial_domain & RADEON_DOMAIN_GTT) p_atomic_add(&ws->allocated_gtt, -align64(bo->size, ws->info.gart_page_size)); @@ -366,12 +370,12 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, if (initial_domain & RADEON_DOMAIN_OA) request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA; - if (flags & RADEON_FLAG_CPU_ACCESS) { - bo->base.vram_cpu_access = initial_domain & RADEON_DOMAIN_VRAM; + if (flags & RADEON_FLAG_CPU_ACCESS) request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - } - if (flags & RADEON_FLAG_NO_CPU_ACCESS) + if (flags & RADEON_FLAG_NO_CPU_ACCESS) { + bo->base.vram_no_cpu_access = initial_domain & RADEON_DOMAIN_VRAM; request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; + } if (flags & RADEON_FLAG_GTT_WC) request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC; if (!(flags & RADEON_FLAG_IMPLICIT_SYNC) && ws->info.drm_minor >= 22) @@ -411,12 +415,24 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle); assert(!r); - if (initial_domain & RADEON_DOMAIN_VRAM) - p_atomic_add(&ws->allocated_vram, - align64(bo->size, ws->info.gart_page_size)); - if (bo->base.vram_cpu_access) - p_atomic_add(&ws->allocated_vram_vis, - align64(bo->size, ws->info.gart_page_size)); + if (initial_domain & RADEON_DOMAIN_VRAM) { + /* Buffers allocated in VRAM with the NO_CPU_ACCESS flag + * aren't mappable and they are counted as part of the VRAM + * counter. + * + * Otherwise, buffers with the CPU_ACCESS flag or without any + * of both (imported buffers) are counted as part of the VRAM + * visible counter because they can be mapped. + */ + if (bo->base.vram_no_cpu_access) { + p_atomic_add(&ws->allocated_vram, + align64(bo->size, ws->info.gart_page_size)); + } else { + p_atomic_add(&ws->allocated_vram_vis, + align64(bo->size, ws->info.gart_page_size)); + } + } + if (initial_domain & RADEON_DOMAIN_GTT) p_atomic_add(&ws->allocated_gtt, align64(bo->size, ws->info.gart_page_size)); -- 2.30.2