+static VkResult
+anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
+{
+ /* The kernel query only tells us whether or not the kernel supports the
+ * EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag and not whether or not the
+ * hardware has actual 48bit address support.
+ */
+ device->supports_48bit_addresses =
+ (device->info.gen >= 8) && anv_gem_supports_48b_addresses(fd);
+
+ uint64_t heap_size;
+ VkResult result = anv_compute_heap_size(fd, &heap_size);
+ if (result != VK_SUCCESS)
+ return result;
+
+ if (heap_size <= 3ull * (1ull << 30)) {
+ /* In this case, everything fits nicely into the 32-bit address space,
+ * so there's no need for supporting 48bit addresses on client-allocated
+ * memory objects.
+ */
+ device->memory.heap_count = 1;
+ device->memory.heaps[0] = (struct anv_memory_heap) {
+ .size = heap_size,
+ .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+ .supports_48bit_addresses = false,
+ };
+ } else {
+ /* Not everything will fit nicely into a 32-bit address space. In this
+ * case we need a 64-bit heap. Advertise a small 32-bit heap and a
+ * larger 48-bit heap. If we're in this case, then we have a total heap
+ * size larger than 3GiB which most likely means they have 8 GiB of
+ * video memory and so carving off 1 GiB for the 32-bit heap should be
+ * reasonable.
+ */
+ const uint64_t heap_size_32bit = 1ull << 30;
+ const uint64_t heap_size_48bit = heap_size - heap_size_32bit;
+
+ assert(device->supports_48bit_addresses);
+
+ device->memory.heap_count = 2;
+ device->memory.heaps[0] = (struct anv_memory_heap) {
+ .size = heap_size_48bit,
+ .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+ .supports_48bit_addresses = true,
+ };
+ device->memory.heaps[1] = (struct anv_memory_heap) {
+ .size = heap_size_32bit,
+ .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+ .supports_48bit_addresses = false,
+ };
+ }
+
+ uint32_t type_count = 0;
+ for (uint32_t heap = 0; heap < device->memory.heap_count; heap++) {
+ uint32_t valid_buffer_usage = ~0;
+
+ /* There appears to be a hardware issue in the VF cache where it only
+ * considers the bottom 32 bits of memory addresses. If you happen to
+ * have two vertex buffers which get placed exactly 4 GiB apart and use
+ * them in back-to-back draw calls, you can get collisions. In order to
+ * solve this problem, we require vertex and index buffers be bound to
+ * memory allocated out of the 32-bit heap.
+ */
+ if (device->memory.heaps[heap].supports_48bit_addresses) {
+ valid_buffer_usage &= ~(VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
+ VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
+ }
+
+ if (device->info.has_llc) {
+ /* Big core GPUs share LLC with the CPU and thus one memory type can be
+ * both cached and coherent at the same time.
+ */
+ device->memory.types[type_count++] = (struct anv_memory_type) {
+ .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
+ VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
+ .heapIndex = heap,
+ .valid_buffer_usage = valid_buffer_usage,
+ };
+ } else {
+ /* The spec requires that we expose a host-visible, coherent memory
+ * type, but Atom GPUs don't share LLC. Thus we offer two memory types
+ * to give the application a choice between cached, but not coherent and
+ * coherent but uncached (WC though).
+ */
+ device->memory.types[type_count++] = (struct anv_memory_type) {
+ .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+ .heapIndex = heap,
+ .valid_buffer_usage = valid_buffer_usage,
+ };
+ device->memory.types[type_count++] = (struct anv_memory_type) {
+ .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
+ .heapIndex = heap,
+ .valid_buffer_usage = valid_buffer_usage,
+ };
+ }
+ }
+ device->memory.type_count = type_count;
+
+ return VK_SUCCESS;
+}
+