anv: Use an anv_address in anv_buffer
[mesa.git] / src / intel / vulkan / anv_device.c
index bccb669e4f2f05a570616375842a09dfd7e98eb0..19fa1b708f5c5c5bfc7d35c6c5618755504014f3 100644 (file)
@@ -58,23 +58,8 @@ compiler_perf_log(void *data, const char *fmt, ...)
 }
 
 static VkResult
-anv_compute_heap_size(int fd, uint64_t *heap_size)
+anv_compute_heap_size(int fd, uint64_t gtt_size, uint64_t *heap_size)
 {
-   uint64_t gtt_size;
-   if (anv_gem_get_context_param(fd, 0, I915_CONTEXT_PARAM_GTT_SIZE,
-                                 &gtt_size) == -1) {
-      /* If, for whatever reason, we can't actually get the GTT size from the
-       * kernel (too old?) fall back to the aperture size.
-       */
-      anv_perf_warn(NULL, NULL,
-                    "Failed to get I915_CONTEXT_PARAM_GTT_SIZE: %m");
-
-      if (anv_gem_get_aperture(fd, &gtt_size) == -1) {
-         return vk_errorf(NULL, NULL, VK_ERROR_INITIALIZATION_FAILED,
-                          "failed to get aperture size: %m");
-      }
-   }
-
    /* Query the total ram from the system */
    struct sysinfo info;
    sysinfo(&info);
@@ -103,15 +88,26 @@ anv_compute_heap_size(int fd, uint64_t *heap_size)
 static VkResult
 anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
 {
-   /* The kernel query only tells us whether or not the kernel supports the
-    * EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag and not whether or not the
-    * hardware has actual 48bit address support.
-    */
-   device->supports_48bit_addresses =
-      (device->info.gen >= 8) && anv_gem_supports_48b_addresses(fd);
+   uint64_t gtt_size;
+   if (anv_gem_get_context_param(fd, 0, I915_CONTEXT_PARAM_GTT_SIZE,
+                                 &gtt_size) == -1) {
+      /* If, for whatever reason, we can't actually get the GTT size from the
+       * kernel (too old?) fall back to the aperture size.
+       */
+      anv_perf_warn(NULL, NULL,
+                    "Failed to get I915_CONTEXT_PARAM_GTT_SIZE: %m");
+
+      if (anv_gem_get_aperture(fd, &gtt_size) == -1) {
+         return vk_errorf(NULL, NULL, VK_ERROR_INITIALIZATION_FAILED,
+                          "failed to get aperture size: %m");
+      }
+   }
+
+   device->supports_48bit_addresses = (device->info.gen >= 8) &&
+      gtt_size > (4ULL << 30 /* GiB */);
 
-   uint64_t heap_size;
-   VkResult result = anv_compute_heap_size(fd, &heap_size);
+   uint64_t heap_size = 0;
+   VkResult result = anv_compute_heap_size(fd, gtt_size, &heap_size);
    if (result != VK_SUCCESS)
       return result;
 
@@ -323,6 +319,8 @@ anv_physical_device_init(struct anv_physical_device *device,
       intel_logw("Bay Trail Vulkan support is incomplete");
    } else if (device->info.gen >= 8 && device->info.gen <= 10) {
       /* Gen8-10 fully supported */
+   } else if (device->info.gen == 11) {
+      intel_logw("Vulkan is not yet fully supported on gen11.");
    } else {
       result = vk_errorf(device->instance, device,
                          VK_ERROR_INCOMPATIBLE_DRIVER,
@@ -376,6 +374,9 @@ anv_physical_device_init(struct anv_physical_device *device,
                               anv_gem_supports_syncobj_wait(fd);
    device->has_context_priority = anv_gem_has_context_priority(fd);
 
+   device->use_softpin = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN)
+      && device->supports_48bit_addresses;
+
    bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X);
 
    /* Starting with Gen10, the timestamp frequency of the command streamer may
@@ -514,41 +515,6 @@ VkResult anv_CreateInstance(
 
    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
 
-   /* Check if user passed a debug report callback to be used during
-    * Create/Destroy of instance.
-    */
-   const VkDebugReportCallbackCreateInfoEXT *ctor_cb =
-      vk_find_struct_const(pCreateInfo->pNext,
-                           DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT);
-
-   uint32_t client_version;
-   if (pCreateInfo->pApplicationInfo &&
-       pCreateInfo->pApplicationInfo->apiVersion != 0) {
-      client_version = pCreateInfo->pApplicationInfo->apiVersion;
-   } else {
-      client_version = VK_MAKE_VERSION(1, 0, 0);
-   }
-
-   if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
-       client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
-
-      if (ctor_cb && ctor_cb->flags & VK_DEBUG_REPORT_ERROR_BIT_EXT)
-         ctor_cb->pfnCallback(VK_DEBUG_REPORT_ERROR_BIT_EXT,
-                              VK_DEBUG_REPORT_OBJECT_TYPE_INSTANCE_EXT,
-                              VK_NULL_HANDLE, /* No handle available yet. */
-                              __LINE__,
-                              0,
-                              "anv",
-                              "incompatible driver version",
-                              ctor_cb->pUserData);
-
-      return vk_errorf(NULL, NULL, VK_ERROR_INCOMPATIBLE_DRIVER,
-                       "Client requested version %d.%d.%d",
-                       VK_VERSION_MAJOR(client_version),
-                       VK_VERSION_MINOR(client_version),
-                       VK_VERSION_PATCH(client_version));
-   }
-
    struct anv_instance_extension_table enabled_extensions = {};
    for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
       int idx;
@@ -579,7 +545,13 @@ VkResult anv_CreateInstance(
    else
       instance->alloc = default_alloc;
 
-   instance->apiVersion = client_version;
+   if (pCreateInfo->pApplicationInfo &&
+       pCreateInfo->pApplicationInfo->apiVersion != 0) {
+      instance->apiVersion = pCreateInfo->pApplicationInfo->apiVersion;
+   } else {
+      anv_EnumerateInstanceVersion(&instance->apiVersion);
+   }
+
    instance->enabled_extensions = enabled_extensions;
 
    for (unsigned i = 0; i < ARRAY_SIZE(instance->dispatch.entrypoints); i++) {
@@ -786,11 +758,13 @@ void anv_GetPhysicalDeviceFeatures(
       .shaderStorageImageArrayDynamicIndexing   = true,
       .shaderClipDistance                       = true,
       .shaderCullDistance                       = true,
-      .shaderFloat64                            = pdevice->info.gen >= 8,
-      .shaderInt64                              = pdevice->info.gen >= 8,
-      .shaderInt16                              = false,
+      .shaderFloat64                            = pdevice->info.gen >= 8 &&
+                                                  pdevice->info.has_64bit_types,
+      .shaderInt64                              = pdevice->info.gen >= 8 &&
+                                                  pdevice->info.has_64bit_types,
+      .shaderInt16                              = pdevice->info.gen >= 8,
       .shaderResourceMinLod                     = false,
-      .variableMultisampleRate                  = false,
+      .variableMultisampleRate                  = true,
       .inheritedQueries                         = true,
    };
 
@@ -1064,6 +1038,30 @@ void anv_GetPhysicalDeviceProperties2(
          break;
       }
 
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
+         VkPhysicalDeviceSubgroupProperties *properties = (void *)ext;
+
+         properties->subgroupSize = BRW_SUBGROUP_SIZE;
+
+         VkShaderStageFlags scalar_stages = 0;
+         for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
+            if (pdevice->compiler->scalar_stage[stage])
+               scalar_stages |= mesa_to_vk_shader_stage(stage);
+         }
+         properties->supportedStages = scalar_stages;
+
+         properties->supportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
+                                           VK_SUBGROUP_FEATURE_VOTE_BIT |
+                                           VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
+                                           VK_SUBGROUP_FEATURE_BALLOT_BIT |
+                                           VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
+                                           VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
+                                           VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
+                                           VK_SUBGROUP_FEATURE_QUAD_BIT;
+         properties->quadOperationsInAllStages = VK_TRUE;
+         break;
+      }
+
       default:
          anv_debug_ignored_stype(ext->sType);
          break;
@@ -1184,6 +1182,7 @@ PFN_vkVoidFunction anv_GetInstanceProcAddr(
 
    LOOKUP_ANV_ENTRYPOINT(EnumerateInstanceExtensionProperties);
    LOOKUP_ANV_ENTRYPOINT(EnumerateInstanceLayerProperties);
+   LOOKUP_ANV_ENTRYPOINT(EnumerateInstanceVersion);
    LOOKUP_ANV_ENTRYPOINT(CreateInstance);
 
 #undef LOOKUP_ANV_ENTRYPOINT
@@ -1272,7 +1271,6 @@ anv_queue_init(struct anv_device *device, struct anv_queue *queue)
 {
    queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
    queue->device = device;
-   queue->pool = &device->surface_state_pool;
    queue->flags = 0;
 }
 
@@ -1372,6 +1370,9 @@ anv_device_init_dispatch(struct anv_device *device)
 {
    const struct anv_dispatch_table *genX_table;
    switch (device->info.gen) {
+   case 11:
+      genX_table = &gen11_dispatch_table;
+      break;
    case 10:
       genX_table = &gen10_dispatch_table;
       break;
@@ -1424,6 +1425,20 @@ vk_priority_to_gen(int priority)
    }
 }
 
+static void
+anv_device_init_hiz_clear_batch(struct anv_device *device)
+{
+   anv_bo_init_new(&device->hiz_clear_bo, device, 4096);
+   uint32_t *map = anv_gem_mmap(device, device->hiz_clear_bo.gem_handle,
+                                0, 4096, 0);
+
+   union isl_color_value hiz_clear = { .u32 = { 0, } };
+   hiz_clear.f32[0] = ANV_HZ_FC_VAL;
+
+   memcpy(map, hiz_clear.u32, sizeof(hiz_clear.u32));
+   anv_gem_munmap(map, device->hiz_clear_bo.size);
+}
+
 VkResult anv_CreateDevice(
     VkPhysicalDevice                            physicalDevice,
     const VkDeviceCreateInfo*                   pCreateInfo,
@@ -1467,6 +1482,15 @@ VkResult anv_CreateDevice(
       }
    }
 
+   /* Check requested queues and fail if we are requested to create any
+    * queues with flags we don't support.
+    */
+   assert(pCreateInfo->queueCreateInfoCount > 0);
+   for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
+      if (pCreateInfo->pQueueCreateInfos[i].flags != 0)
+         return vk_error(VK_ERROR_INITIALIZATION_FAILED);
+   }
+
    /* Check if client specified queue priority. */
    const VkDeviceQueueGlobalPriorityCreateInfoEXT *queue_priority =
       vk_find_struct_const(pCreateInfo->pQueueCreateInfos[0].pNext,
@@ -1506,6 +1530,27 @@ VkResult anv_CreateDevice(
       goto fail_fd;
    }
 
+   if (physical_device->use_softpin) {
+      if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
+         result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+         goto fail_fd;
+      }
+
+      /* keep the page with address zero out of the allocator */
+      util_vma_heap_init(&device->vma_lo, LOW_HEAP_MIN_ADDRESS, LOW_HEAP_SIZE);
+      device->vma_lo_available =
+         physical_device->memory.heaps[physical_device->memory.heap_count - 1].size;
+
+      /* Leave the last 4GiB out of the high vma range, so that no state base
+       * address + size can overflow 48 bits. For more information see the
+       * comment about Wa32bitGeneralStateOffset in anv_allocator.c
+       */
+      util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS,
+                         HIGH_HEAP_SIZE);
+      device->vma_hi_available = physical_device->memory.heap_count == 1 ? 0 :
+         physical_device->memory.heaps[0].size;
+   }
+
    /* As per spec, the driver implementation may deny requests to acquire
     * a priority above the default priority (MEDIUM) if the caller does not
     * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_EXT
@@ -1595,6 +1640,9 @@ VkResult anv_CreateDevice(
 
    anv_device_init_trivial_batch(device);
 
+   if (device->info.gen >= 10)
+      anv_device_init_hiz_clear_batch(device);
+
    anv_scratch_pool_init(device, &device->scratch_pool);
 
    anv_queue_init(device, &device->queue);
@@ -1688,6 +1736,8 @@ void anv_DestroyDevice(
    anv_gem_close(device, device->workaround_bo.gem_handle);
 
    anv_gem_close(device, device->trivial_batch_bo.gem_handle);
+   if (device->info.gen >= 10)
+      anv_gem_close(device, device->hiz_clear_bo.gem_handle);
 
    anv_state_pool_finish(&device->surface_state_pool);
    anv_state_pool_finish(&device->instruction_state_pool);
@@ -1861,6 +1911,66 @@ VkResult anv_DeviceWaitIdle(
    return anv_device_submit_simple_batch(device, &batch);
 }
 
+bool
+anv_vma_alloc(struct anv_device *device, struct anv_bo *bo)
+{
+   if (!(bo->flags & EXEC_OBJECT_PINNED))
+      return true;
+
+   pthread_mutex_lock(&device->vma_mutex);
+
+   bo->offset = 0;
+
+   if (bo->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS &&
+       device->vma_hi_available >= bo->size) {
+      uint64_t addr = util_vma_heap_alloc(&device->vma_hi, bo->size, 4096);
+      if (addr) {
+         bo->offset = gen_canonical_address(addr);
+         assert(addr == gen_48b_address(bo->offset));
+         device->vma_hi_available -= bo->size;
+      }
+   }
+
+   if (bo->offset == 0 && device->vma_lo_available >= bo->size) {
+      uint64_t addr = util_vma_heap_alloc(&device->vma_lo, bo->size, 4096);
+      if (addr) {
+         bo->offset = gen_canonical_address(addr);
+         assert(addr == gen_48b_address(bo->offset));
+         device->vma_lo_available -= bo->size;
+      }
+   }
+
+   pthread_mutex_unlock(&device->vma_mutex);
+
+   return bo->offset != 0;
+}
+
+void
+anv_vma_free(struct anv_device *device, struct anv_bo *bo)
+{
+   if (!(bo->flags & EXEC_OBJECT_PINNED))
+      return;
+
+   const uint64_t addr_48b = gen_48b_address(bo->offset);
+
+   pthread_mutex_lock(&device->vma_mutex);
+
+   if (addr_48b >= LOW_HEAP_MIN_ADDRESS &&
+       addr_48b <= LOW_HEAP_MAX_ADDRESS) {
+      util_vma_heap_free(&device->vma_lo, addr_48b, bo->size);
+      device->vma_lo_available += bo->size;
+   } else {
+      assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS &&
+             addr_48b <= HIGH_HEAP_MAX_ADDRESS);
+      util_vma_heap_free(&device->vma_hi, addr_48b, bo->size);
+      device->vma_hi_available += bo->size;
+   }
+
+   pthread_mutex_unlock(&device->vma_mutex);
+
+   bo->offset = 0;
+}
+
 VkResult
 anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
 {
@@ -2406,11 +2516,12 @@ anv_bind_buffer_memory(const VkBindBufferMemoryInfo *pBindInfo)
 
    if (mem) {
       assert((buffer->usage & mem->type->valid_buffer_usage) == buffer->usage);
-      buffer->bo = mem->bo;
-      buffer->offset = pBindInfo->memoryOffset;
+      buffer->address = (struct anv_address) {
+         .bo = mem->bo,
+         .offset = pBindInfo->memoryOffset,
+      };
    } else {
-      buffer->bo = NULL;
-      buffer->offset = 0;
+      buffer->address = ANV_NULL_ADDRESS;
    }
 }
 
@@ -2576,8 +2687,7 @@ VkResult anv_CreateBuffer(
 
    buffer->size = pCreateInfo->size;
    buffer->usage = pCreateInfo->usage;
-   buffer->bo = NULL;
-   buffer->offset = 0;
+   buffer->address = ANV_NULL_ADDRESS;
 
    *pBuffer = anv_buffer_to_handle(buffer);
 
@@ -2601,10 +2711,11 @@ void anv_DestroyBuffer(
 void
 anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
                               enum isl_format format,
-                              uint32_t offset, uint32_t range, uint32_t stride)
+                              struct anv_address address,
+                              uint32_t range, uint32_t stride)
 {
    isl_buffer_fill_state(&device->isl_dev, state.map,
-                         .address = offset,
+                         .address = anv_address_physical(address),
                          .mocs = device->default_mocs,
                          .size = range,
                          .format = format,