anv: Rework CCS memory handling on TGL-LP
[mesa.git] / src / intel / vulkan / anv_device.c
index 509f92a503486f3619828f195ae635b4e5e4db64..b6f941d669f1956a72a7b44654569e3236a27a87 100644 (file)
@@ -329,9 +329,9 @@ get_available_system_memory()
 }
 
 static VkResult
-anv_physical_device_init(struct anv_physical_device *device,
-                         struct anv_instance *instance,
-                         drmDevicePtr drm_device)
+anv_physical_device_try_create(struct anv_instance *instance,
+                               drmDevicePtr drm_device,
+                               struct anv_physical_device **device_out)
 {
    const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY];
    const char *path = drm_device->nodes[DRM_NODE_RENDER];
@@ -348,7 +348,7 @@ anv_physical_device_init(struct anv_physical_device *device,
    struct gen_device_info devinfo;
    if (!gen_get_device_info_from_fd(fd, &devinfo)) {
       result = vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
-      goto fail;
+      goto fail_fd;
    }
 
    const char *device_name = gen_get_device_name(devinfo.chipset_id);
@@ -366,7 +366,15 @@ anv_physical_device_init(struct anv_physical_device *device,
    } else {
       result = vk_errorfi(instance, NULL, VK_ERROR_INCOMPATIBLE_DRIVER,
                           "Vulkan not yet supported on %s", device_name);
-      goto fail;
+      goto fail_fd;
+   }
+
+   struct anv_physical_device *device =
+      vk_alloc(&instance->alloc, sizeof(*device), 8,
+               VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+   if (device == NULL) {
+      result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+      goto fail_fd;
    }
 
    device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
@@ -395,7 +403,7 @@ anv_physical_device_init(struct anv_physical_device *device,
          result = vk_errorfi(device->instance, NULL,
                              VK_ERROR_INITIALIZATION_FAILED,
                              "failed to get command parser version");
-         goto fail;
+         goto fail_alloc;
       }
    }
 
@@ -403,14 +411,14 @@ anv_physical_device_init(struct anv_physical_device *device,
       result = vk_errorfi(device->instance, NULL,
                           VK_ERROR_INITIALIZATION_FAILED,
                           "kernel missing gem wait");
-      goto fail;
+      goto fail_alloc;
    }
 
    if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) {
       result = vk_errorfi(device->instance, NULL,
                           VK_ERROR_INITIALIZATION_FAILED,
                           "kernel missing execbuf2");
-      goto fail;
+      goto fail_alloc;
    }
 
    if (!device->info.has_llc &&
@@ -418,7 +426,7 @@ anv_physical_device_init(struct anv_physical_device *device,
       result = vk_errorfi(device->instance, NULL,
                           VK_ERROR_INITIALIZATION_FAILED,
                           "kernel missing wc mmap");
-      goto fail;
+      goto fail_alloc;
    }
 
    device->has_softpin = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN);
@@ -432,7 +440,7 @@ anv_physical_device_init(struct anv_physical_device *device,
 
    result = anv_physical_device_init_heaps(device, fd);
    if (result != VK_SUCCESS)
-      goto fail;
+      goto fail_alloc;
 
    device->use_softpin = device->has_softpin &&
                          device->supports_48bit_addresses;
@@ -462,6 +470,8 @@ anv_physical_device_init(struct anv_physical_device *device,
     */
    device->has_bindless_samplers = device->info.gen >= 8;
 
+   device->has_implicit_ccs = device->info.has_aux_map;
+
    device->has_mem_available = get_available_system_memory() != 0;
 
    device->always_flush_cache =
@@ -510,7 +520,7 @@ anv_physical_device_init(struct anv_physical_device *device,
    device->compiler = brw_compiler_create(NULL, &device->info);
    if (device->compiler == NULL) {
       result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-      goto fail;
+      goto fail_alloc;
    }
    device->compiler->shader_debug_log = compiler_debug_log;
    device->compiler->shader_perf_log = compiler_perf_log;
@@ -538,7 +548,7 @@ anv_physical_device_init(struct anv_physical_device *device,
 
    result = anv_physical_device_init_uuids(device);
    if (result != VK_SUCCESS)
-      goto fail;
+      goto fail_compiler;
 
    anv_physical_device_init_disk_cache(device);
 
@@ -557,11 +567,8 @@ anv_physical_device_init(struct anv_physical_device *device,
    device->master_fd = master_fd;
 
    result = anv_init_wsi(device);
-   if (result != VK_SUCCESS) {
-      ralloc_free(device->compiler);
-      anv_physical_device_free_disk_cache(device);
-      goto fail;
-   }
+   if (result != VK_SUCCESS)
+      goto fail_disk_cache;
 
    device->perf = anv_get_perf(&device->info, fd);
 
@@ -571,9 +578,17 @@ anv_physical_device_init(struct anv_physical_device *device,
 
    device->local_fd = fd;
 
+   *device_out = device;
+
    return VK_SUCCESS;
 
-fail:
+fail_disk_cache:
+   anv_physical_device_free_disk_cache(device);
+fail_compiler:
+   ralloc_free(device->compiler);
+fail_alloc:
+   vk_free(&instance->alloc, device);
+fail_fd:
    close(fd);
    if (master_fd != -1)
       close(master_fd);
@@ -581,7 +596,7 @@ fail:
 }
 
 static void
-anv_physical_device_finish(struct anv_physical_device *device)
+anv_physical_device_destroy(struct anv_physical_device *device)
 {
    anv_finish_wsi(device);
    anv_physical_device_free_disk_cache(device);
@@ -590,6 +605,7 @@ anv_physical_device_finish(struct anv_physical_device *device)
    close(device->local_fd);
    if (device->master_fd >= 0)
       close(device->master_fd);
+   vk_free(&device->instance->alloc, device);
 }
 
 static void *
@@ -738,7 +754,8 @@ VkResult anv_CreateInstance(
       }
    }
 
-   instance->physicalDeviceCount = -1;
+   instance->physical_devices_enumerated = false;
+   list_inithead(&instance->physical_devices);
 
    result = vk_debug_report_instance_init(&instance->debug_report_callbacks);
    if (result != VK_SUCCESS) {
@@ -773,11 +790,9 @@ void anv_DestroyInstance(
    if (!instance)
       return;
 
-   if (instance->physicalDeviceCount > 0) {
-      /* We support at most one physical device. */
-      assert(instance->physicalDeviceCount == 1);
-      anv_physical_device_finish(&instance->physicalDevice);
-   }
+   list_for_each_entry_safe(struct anv_physical_device, pdevice,
+                            &instance->physical_devices, link)
+      anv_physical_device_destroy(pdevice);
 
    vk_free(&instance->alloc, (char *)instance->app_info.app_name);
    vk_free(&instance->alloc, (char *)instance->app_info.engine_name);
@@ -795,51 +810,49 @@ void anv_DestroyInstance(
 }
 
 static VkResult
-anv_enumerate_devices(struct anv_instance *instance)
+anv_enumerate_physical_devices(struct anv_instance *instance)
 {
+   if (instance->physical_devices_enumerated)
+      return VK_SUCCESS;
+
+   instance->physical_devices_enumerated = true;
+
    /* TODO: Check for more devices ? */
    drmDevicePtr devices[8];
-   VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
    int max_devices;
 
-   instance->physicalDeviceCount = 0;
-
    max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
    if (max_devices < 1)
-      return VK_ERROR_INCOMPATIBLE_DRIVER;
+      return VK_SUCCESS;
 
+   VkResult result = VK_SUCCESS;
    for (unsigned i = 0; i < (unsigned)max_devices; i++) {
       if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
           devices[i]->bustype == DRM_BUS_PCI &&
           devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
 
-         result = anv_physical_device_init(&instance->physicalDevice,
-                                           instance, devices[i]);
-         if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
+         struct anv_physical_device *pdevice;
+         result = anv_physical_device_try_create(instance, devices[i],
+                                                 &pdevice);
+         /* Incompatible DRM device, skip. */
+         if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
+            result = VK_SUCCESS;
+            continue;
+         }
+
+         /* Error creating the physical device, report the error. */
+         if (result != VK_SUCCESS)
             break;
+
+         list_addtail(&pdevice->link, &instance->physical_devices);
       }
    }
    drmFreeDevices(devices, max_devices);
 
-   if (result == VK_SUCCESS)
-      instance->physicalDeviceCount = 1;
-
+   /* If we successfully enumerated any devices, call it success */
    return result;
 }
 
-static VkResult
-anv_instance_ensure_physical_device(struct anv_instance *instance)
-{
-   if (instance->physicalDeviceCount < 0) {
-      VkResult result = anv_enumerate_devices(instance);
-      if (result != VK_SUCCESS &&
-          result != VK_ERROR_INCOMPATIBLE_DRIVER)
-         return result;
-   }
-
-   return VK_SUCCESS;
-}
-
 VkResult anv_EnumeratePhysicalDevices(
     VkInstance                                  _instance,
     uint32_t*                                   pPhysicalDeviceCount,
@@ -848,16 +861,15 @@ VkResult anv_EnumeratePhysicalDevices(
    ANV_FROM_HANDLE(anv_instance, instance, _instance);
    VK_OUTARRAY_MAKE(out, pPhysicalDevices, pPhysicalDeviceCount);
 
-   VkResult result = anv_instance_ensure_physical_device(instance);
+   VkResult result = anv_enumerate_physical_devices(instance);
    if (result != VK_SUCCESS)
       return result;
 
-   if (instance->physicalDeviceCount == 0)
-      return VK_SUCCESS;
-
-   assert(instance->physicalDeviceCount == 1);
-   vk_outarray_append(&out, i) {
-      *i = anv_physical_device_to_handle(&instance->physicalDevice);
+   list_for_each_entry(struct anv_physical_device, pdevice,
+                       &instance->physical_devices, link) {
+      vk_outarray_append(&out, i) {
+         *i = anv_physical_device_to_handle(pdevice);
+      }
    }
 
    return vk_outarray_status(&out);
@@ -872,24 +884,21 @@ VkResult anv_EnumeratePhysicalDeviceGroups(
    VK_OUTARRAY_MAKE(out, pPhysicalDeviceGroupProperties,
                          pPhysicalDeviceGroupCount);
 
-   VkResult result = anv_instance_ensure_physical_device(instance);
+   VkResult result = anv_enumerate_physical_devices(instance);
    if (result != VK_SUCCESS)
       return result;
 
-   if (instance->physicalDeviceCount == 0)
-      return VK_SUCCESS;
-
-   assert(instance->physicalDeviceCount == 1);
+   list_for_each_entry(struct anv_physical_device, pdevice,
+                       &instance->physical_devices, link) {
+      vk_outarray_append(&out, p) {
+         p->physicalDeviceCount = 1;
+         memset(p->physicalDevices, 0, sizeof(p->physicalDevices));
+         p->physicalDevices[0] = anv_physical_device_to_handle(pdevice);
+         p->subsetAllocation = false;
 
-   vk_outarray_append(&out, p) {
-      p->physicalDeviceCount = 1;
-      memset(p->physicalDevices, 0, sizeof(p->physicalDevices));
-      p->physicalDevices[0] =
-         anv_physical_device_to_handle(&instance->physicalDevice);
-      p->subsetAllocation = false;
-
-      vk_foreach_struct(ext, p->pNext)
-         anv_debug_ignored_stype(ext->sType);
+         vk_foreach_struct(ext, p->pNext)
+            anv_debug_ignored_stype(ext->sType);
+      }
    }
 
    return vk_outarray_status(&out);
@@ -942,9 +951,9 @@ void anv_GetPhysicalDeviceFeatures(
       .shaderClipDistance                       = true,
       .shaderCullDistance                       = true,
       .shaderFloat64                            = pdevice->info.gen >= 8 &&
-                                                  pdevice->info.has_64bit_types,
+                                                  pdevice->info.has_64bit_float,
       .shaderInt64                              = pdevice->info.gen >= 8 &&
-                                                  pdevice->info.has_64bit_types,
+                                                  pdevice->info.has_64bit_int,
       .shaderInt16                              = pdevice->info.gen >= 8,
       .shaderResourceMinLod                     = pdevice->info.gen >= 9,
       .variableMultisampleRate                  = true,
@@ -3194,40 +3203,24 @@ VkResult anv_DeviceWaitIdle(
    return anv_queue_submit_simple_batch(&device->queue, NULL);
 }
 
-bool
-anv_vma_alloc(struct anv_device *device, struct anv_bo *bo,
+uint64_t
+anv_vma_alloc(struct anv_device *device,
+              uint64_t size, uint64_t align,
+              enum anv_bo_alloc_flags alloc_flags,
               uint64_t client_address)
 {
-   const struct gen_device_info *devinfo = &device->info;
-   /* Gen12 CCS surface addresses need to be 64K aligned. We have no way of
-    * telling what this allocation is for so pick the largest alignment.
-    */
-   const uint32_t vma_alignment =
-      devinfo->gen >= 12 ? (64 * 1024) : (4 * 1024);
-
-   if (!(bo->flags & EXEC_OBJECT_PINNED)) {
-      assert(!(bo->has_client_visible_address));
-      return true;
-   }
-
    pthread_mutex_lock(&device->vma_mutex);
 
-   bo->offset = 0;
+   uint64_t addr = 0;
 
-   if (bo->has_client_visible_address) {
-      assert(bo->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS);
+   if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) {
       if (client_address) {
          if (util_vma_heap_alloc_addr(&device->vma_cva,
-                                      client_address, bo->size)) {
-            bo->offset = gen_canonical_address(client_address);
+                                      client_address, size)) {
+            addr = client_address;
          }
       } else {
-         uint64_t addr =
-            util_vma_heap_alloc(&device->vma_cva, bo->size, vma_alignment);
-         if (addr) {
-            bo->offset = gen_canonical_address(addr);
-            assert(addr == gen_48b_address(bo->offset));
-         }
+         addr = util_vma_heap_alloc(&device->vma_cva, size, align);
       }
       /* We don't want to fall back to other heaps */
       goto done;
@@ -3235,54 +3228,39 @@ anv_vma_alloc(struct anv_device *device, struct anv_bo *bo,
 
    assert(client_address == 0);
 
-   if (bo->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) {
-      uint64_t addr =
-         util_vma_heap_alloc(&device->vma_hi, bo->size, vma_alignment);
-      if (addr) {
-         bo->offset = gen_canonical_address(addr);
-         assert(addr == gen_48b_address(bo->offset));
-      }
-   }
+   if (!(alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS))
+      addr = util_vma_heap_alloc(&device->vma_hi, size, align);
 
-   if (bo->offset == 0) {
-      uint64_t addr =
-         util_vma_heap_alloc(&device->vma_lo, bo->size, vma_alignment);
-      if (addr) {
-         bo->offset = gen_canonical_address(addr);
-         assert(addr == gen_48b_address(bo->offset));
-      }
-   }
+   if (addr == 0)
+      addr = util_vma_heap_alloc(&device->vma_lo, size, align);
 
 done:
    pthread_mutex_unlock(&device->vma_mutex);
 
-   return bo->offset != 0;
+   assert(addr == gen_48b_address(addr));
+   return gen_canonical_address(addr);
 }
 
 void
-anv_vma_free(struct anv_device *device, struct anv_bo *bo)
+anv_vma_free(struct anv_device *device,
+             uint64_t address, uint64_t size)
 {
-   if (!(bo->flags & EXEC_OBJECT_PINNED))
-      return;
-
-   const uint64_t addr_48b = gen_48b_address(bo->offset);
+   const uint64_t addr_48b = gen_48b_address(address);
 
    pthread_mutex_lock(&device->vma_mutex);
 
    if (addr_48b >= LOW_HEAP_MIN_ADDRESS &&
        addr_48b <= LOW_HEAP_MAX_ADDRESS) {
-      util_vma_heap_free(&device->vma_lo, addr_48b, bo->size);
+      util_vma_heap_free(&device->vma_lo, addr_48b, size);
    } else if (addr_48b >= CLIENT_VISIBLE_HEAP_MIN_ADDRESS &&
               addr_48b <= CLIENT_VISIBLE_HEAP_MAX_ADDRESS) {
-      util_vma_heap_free(&device->vma_cva, addr_48b, bo->size);
+      util_vma_heap_free(&device->vma_cva, addr_48b, size);
    } else {
       assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS);
-      util_vma_heap_free(&device->vma_hi, addr_48b, bo->size);
+      util_vma_heap_free(&device->vma_hi, addr_48b, size);
    }
 
    pthread_mutex_unlock(&device->vma_mutex);
-
-   bo->offset = 0;
 }
 
 VkResult anv_AllocateMemory(
@@ -3381,9 +3359,27 @@ VkResult anv_AllocateMemory(
       }
    }
 
+   /* By default, we want all VkDeviceMemory objects to support CCS */
+   if (device->physical->has_implicit_ccs)
+      alloc_flags |= ANV_BO_ALLOC_IMPLICIT_CCS;
+
    if (vk_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR)
       alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;
 
+   if ((export_info && export_info->handleTypes) ||
+       (fd_info && fd_info->handleType) ||
+       (host_ptr_info && host_ptr_info->handleType)) {
+      /* Anything imported or exported is EXTERNAL */
+      alloc_flags |= ANV_BO_ALLOC_EXTERNAL;
+
+      /* We can't have implicit CCS on external memory with an AUX-table.
+       * Doing so would require us to sync the aux tables across processes
+       * which is impractical.
+       */
+      if (device->info.has_aux_map)
+         alloc_flags &= ~ANV_BO_ALLOC_IMPLICIT_CCS;
+   }
+
    /* Check if we need to support Android HW buffer export. If so,
     * create AHardwareBuffer and import memory from it.
     */
@@ -3428,9 +3424,6 @@ VkResult anv_AllocateMemory(
       if (result != VK_SUCCESS)
          goto fail;
 
-      VkDeviceSize aligned_alloc_size =
-         align_u64(pAllocateInfo->allocationSize, 4096);
-
       /* For security purposes, we reject importing the bo if it's smaller
        * than the requested allocation size.  This prevents a malicious client
        * from passing a buffer to a trusted client, lying about the size, and
@@ -3487,9 +3480,6 @@ VkResult anv_AllocateMemory(
 
    /* Regular allocate (not importing memory). */
 
-   if (export_info && export_info->handleTypes)
-      alloc_flags |= ANV_BO_ALLOC_EXTERNAL;
-
    result = anv_device_alloc_bo(device, pAllocateInfo->allocationSize,
                                 alloc_flags, client_address, &mem->bo);
    if (result != VK_SUCCESS)