From f737fe00a047ae1ae9ec4e15a4ea8b578389f2f6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 21 Nov 2018 18:06:54 -0500 Subject: [PATCH] winsys/amdgpu: overallocate buffers for faster address translation on Gfx9 Sadly, the 3 games I tested (DeusEx:MD, DiRT Rally, DOTA 2) are unaffected by the overallocation, because I guess their buffers don't fall into the small range below a power-of-two size. Reviewed-by: Bas Nieuwenhuizen --- src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 24 +++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index c2e237bb599..36041133d0d 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -442,6 +442,30 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, /* VRAM or GTT must be specified, but not both at the same time. */ assert(util_bitcount(initial_domain & RADEON_DOMAIN_VRAM_GTT) == 1); + /* Gfx9: Overallocate the size to the next power of two for faster address + * translation if we don't waste too much memory. + */ + if (ws->info.chip_class >= GFX9) { + uint64_t next_pot_size = util_next_power_of_two64(size); + + /* For slightly lower than 4 GB allocations, at most 32 MB are wasted. + * For slightly lower than 256 MB allocations, at most 2 MB are wasted. + * For slightly lower than 64 MB allocations, at most 512 KB are wasted. + * + * Waste at most 0.79% (1/127) of the size if we decide to overallocate. + */ + uint64_t max_overalloc = next_pot_size >> 7; + + /* If the next power-of-two size is <= the page size, waste up to + * 6.25% (1/16) of the size if we decide to overallocate. + */ + if (next_pot_size <= ws->info.pte_fragment_size) + max_overalloc = next_pot_size >> 4; + + if (size + max_overalloc >= next_pot_size) + size = next_pot_size; + } + bo = CALLOC_STRUCT(amdgpu_winsys_bo); if (!bo) { return NULL; -- 2.30.2