radv/winsys: do not count visible VRAM buffers twice in the budget
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 30 Apr 2020 16:42:27 +0000 (18:42 +0200)
committerMarge Bot <eric+marge@anholt.net>
Wed, 6 May 2020 06:58:24 +0000 (06:58 +0000)
The VRAM size returned to apps is computed as follows:
vram_size = real_hw_vram_size - visible_vram_size.

Visible VRAM buffers should be counted only in the visible VRAM
counter and not twice. Buffers with the NO_CPU_ACCESS flag are
known to not be mappable, so they are counted in the VRAM counter.

Other buffers, with the CPU_ACCESS flag, or without any of both
(imported buffers) are counted in the visible VRAM counter because
they are mappable.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4834>

src/amd/vulkan/radv_radeon_winsys.h
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c

index ef2b4849692b8ad3be65f54dd68b23ab4e3e9444..e5a4ae57d99094da147e43325226837178302007 100644 (file)
@@ -162,7 +162,7 @@ struct radeon_winsys_fence;
 struct radeon_winsys_bo {
        uint64_t va;
        bool is_local;
-       bool vram_cpu_access;
+       bool vram_no_cpu_access;
 };
 struct radv_winsys_sem_counts {
        uint32_t syncobj_count;
index 740d7d23fa634d46f5c161ae72da2c3cab943845..99bd44226aebdd9a256e954cda0571b5de42df1f 100644 (file)
@@ -276,12 +276,16 @@ static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)
                amdgpu_bo_free(bo->bo);
        }
 
-       if (bo->initial_domain & RADEON_DOMAIN_VRAM)
-               p_atomic_add(&ws->allocated_vram,
-                            -align64(bo->size, ws->info.gart_page_size));
-       if (bo->base.vram_cpu_access)
-               p_atomic_add(&ws->allocated_vram_vis,
-                            -align64(bo->size, ws->info.gart_page_size));
+       if (bo->initial_domain & RADEON_DOMAIN_VRAM) {
+               if (bo->base.vram_no_cpu_access) {
+                       p_atomic_add(&ws->allocated_vram,
+                                    -align64(bo->size, ws->info.gart_page_size));
+               } else {
+                       p_atomic_add(&ws->allocated_vram_vis,
+                                    -align64(bo->size, ws->info.gart_page_size));
+               }
+       }
+
        if (bo->initial_domain & RADEON_DOMAIN_GTT)
                p_atomic_add(&ws->allocated_gtt,
                             -align64(bo->size, ws->info.gart_page_size));
@@ -366,12 +370,12 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
        if (initial_domain & RADEON_DOMAIN_OA)
                request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
 
-       if (flags & RADEON_FLAG_CPU_ACCESS) {
-               bo->base.vram_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;
+       if (flags & RADEON_FLAG_CPU_ACCESS)
                request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
-       }
-       if (flags & RADEON_FLAG_NO_CPU_ACCESS)
+       if (flags & RADEON_FLAG_NO_CPU_ACCESS) {
+               bo->base.vram_no_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;
                request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+       }
        if (flags & RADEON_FLAG_GTT_WC)
                request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
        if (!(flags & RADEON_FLAG_IMPLICIT_SYNC) && ws->info.drm_minor >= 22)
@@ -411,12 +415,24 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
        r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
        assert(!r);
 
-       if (initial_domain & RADEON_DOMAIN_VRAM)
-               p_atomic_add(&ws->allocated_vram,
-                            align64(bo->size, ws->info.gart_page_size));
-       if (bo->base.vram_cpu_access)
-               p_atomic_add(&ws->allocated_vram_vis,
-                            align64(bo->size, ws->info.gart_page_size));
+       if (initial_domain & RADEON_DOMAIN_VRAM) {
+               /* Buffers allocated in VRAM with the NO_CPU_ACCESS flag
+                * aren't mappable and they are counted as part of the VRAM
+                * counter.
+                *
+                * Otherwise, buffers with the CPU_ACCESS flag or without any
+                * of both (imported buffers) are counted as part of the VRAM
+                * visible counter because they can be mapped.
+                */
+               if (bo->base.vram_no_cpu_access) {
+                       p_atomic_add(&ws->allocated_vram,
+                                    align64(bo->size, ws->info.gart_page_size));
+               } else {
+                       p_atomic_add(&ws->allocated_vram_vis,
+                                    align64(bo->size, ws->info.gart_page_size));
+               }
+       }
+
        if (initial_domain & RADEON_DOMAIN_GTT)
                p_atomic_add(&ws->allocated_gtt,
                             align64(bo->size, ws->info.gart_page_size));