From 060a6434eca9fb05ca2dfd612f8abd4786ee4549 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 17 Mar 2017 16:16:06 -0700 Subject: [PATCH] anv: Advertise larger heap sizes Instead of just advertising the aperture size, we do something more intelligent. On systems with a full 48-bit PPGTT, we can address 100% of the available system RAM from the GPU. In order to keep clients from burning 100% of your available RAM for graphics resources, we have a nice little heuristic (which has received exactly zero tuning) to keep things under a reasonable level of control. Reviewed-by: Kristian H. Kristensen --- src/intel/vulkan/anv_device.c | 61 ++++++++++++++++++++++++++-------- src/intel/vulkan/anv_gem.c | 16 +++++++++ src/intel/vulkan/anv_private.h | 12 ++++++- 3 files changed, 75 insertions(+), 14 deletions(-) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 22dd5d6bf3d..356b1b5f0d2 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -53,6 +54,48 @@ compiler_perf_log(void *data, const char *fmt, ...) va_end(args); } +static VkResult +anv_compute_heap_size(int fd, uint64_t *heap_size) +{ + uint64_t gtt_size; + if (anv_gem_get_context_param(fd, 0, I915_CONTEXT_PARAM_GTT_SIZE, + >t_size) == -1) { + /* If, for whatever reason, we can't actually get the GTT size from the + * kernel (too old?) fall back to the aperture size. + */ + anv_perf_warn("Failed to get I915_CONTEXT_PARAM_GTT_SIZE: %m"); + + if (anv_gem_get_aperture(fd, >t_size) == -1) { + return vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "failed to get aperture size: %m"); + } + } + + /* Query the total ram from the system */ + struct sysinfo info; + sysinfo(&info); + + uint64_t total_ram = (uint64_t)info.totalram * (uint64_t)info.mem_unit; + + /* We don't want to burn too much ram with the GPU. If the user has 4GiB + * or less, we use at most half. If they have more than 4GiB, we use 3/4. + */ + uint64_t available_ram; + if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull) + available_ram = total_ram / 2; + else + available_ram = total_ram * 3 / 4; + + /* We also want to leave some padding for things we allocate in the driver, + * so don't go over 3/4 of the GTT either. + */ + uint64_t available_gtt = gtt_size * 3 / 4; + + *heap_size = MIN2(available_ram, available_gtt); + + return VK_SUCCESS; +} + static bool anv_device_get_cache_uuid(void *uuid) { @@ -124,12 +167,6 @@ anv_physical_device_init(struct anv_physical_device *device, } } - if (anv_gem_get_aperture(fd, &device->aperture_size) == -1) { - result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, - "failed to get aperture size: %m"); - goto fail; - } - if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) { result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, "kernel missing gem wait"); @@ -151,6 +188,10 @@ anv_physical_device_init(struct anv_physical_device *device, device->supports_48bit_addresses = anv_gem_supports_48b_addresses(fd); + result = anv_compute_heap_size(fd, &device->heap_size); + if (result != VK_SUCCESS) + goto fail; + if (!anv_device_get_cache_uuid(device->uuid)) { result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, "cannot generate UUID"); @@ -735,12 +776,6 @@ void anv_GetPhysicalDeviceMemoryProperties( VkPhysicalDeviceMemoryProperties* pMemoryProperties) { ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); - VkDeviceSize heap_size; - - /* Reserve some wiggle room for the driver by exposing only 75% of the - * aperture to the heap. - */ - heap_size = 3 * physical_device->aperture_size / 4; if (physical_device->info.has_llc) { /* Big core GPUs share LLC with the CPU and thus one memory type can be @@ -777,7 +812,7 @@ void anv_GetPhysicalDeviceMemoryProperties( pMemoryProperties->memoryHeapCount = 1; pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) { - .size = heap_size, + .size = physical_device->heap_size, .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, }; } diff --git a/src/intel/vulkan/anv_gem.c b/src/intel/vulkan/anv_gem.c index 6a996ea3db8..2d07a3dbb0e 100644 --- a/src/intel/vulkan/anv_gem.c +++ b/src/intel/vulkan/anv_gem.c @@ -287,6 +287,22 @@ anv_gem_destroy_context(struct anv_device *device, int context) return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy); } +int +anv_gem_get_context_param(int fd, int context, uint32_t param, uint64_t *value) +{ + struct drm_i915_gem_context_param gp = { + .ctx_id = context, + .param = param, + }; + + int ret = anv_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &gp); + if (ret == -1) + return -1; + + *value = gp.value; + return 0; +} + int anv_gem_get_aperture(int fd, uint64_t *size) { diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 03381999152..95666d8f4b6 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -516,7 +516,15 @@ struct anv_physical_device { char path[20]; const char * name; struct gen_device_info info; - uint64_t aperture_size; + /** Amount of "GPU memory" we want to advertise + * + * Clearly, this value is bogus since Intel is a UMA architecture. On + * gen7 platforms, we are limited by GTT size unless we want to implement + * fine-grained tracking and GTT splitting. On Broadwell and above we are + * practically unlimited. However, we will never report more than 3/4 of + * the total system ram to try and avoid running out of RAM. + */ + uint64_t heap_size; bool supports_48bit_addresses; struct brw_compiler * compiler; struct isl_device isl_dev; @@ -652,6 +660,8 @@ int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle, uint32_t stride, uint32_t tiling); int anv_gem_create_context(struct anv_device *device); int anv_gem_destroy_context(struct anv_device *device, int context); +int anv_gem_get_context_param(int fd, int context, uint32_t param, + uint64_t *value); int anv_gem_get_param(int fd, uint32_t param); bool anv_gem_get_bit6_swizzle(int fd, uint32_t tiling); int anv_gem_get_aperture(int fd, uint64_t *size); -- 2.30.2