anv: Implement VK_EXT_descriptor_indexing
authorJason Ekstrand <jason.ekstrand@intel.com>
Wed, 27 Feb 2019 22:08:20 +0000 (16:08 -0600)
committerJason Ekstrand <jason@jlekstrand.net>
Fri, 19 Apr 2019 19:56:42 +0000 (19:56 +0000)
Now that everything is in place to do bindless for all resource types
except input attachments and UBOs, VK_EXT_descriptor_indexing is
"trivial".

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
docs/relnotes/19.1.0.html
src/intel/vulkan/anv_descriptor_set.c
src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_extensions.py
src/intel/vulkan/anv_nir_apply_pipeline_layout.c
src/intel/vulkan/anv_pipeline.c

index 36d6dbe27c32151cef99a44039a59e009168de4d..e42fd5ab3bf167e6cf008b41fb8cfbb4be49b239 100644 (file)
@@ -46,6 +46,7 @@ TBD.
 <li>GL_KHR_parallel_shader_compile on all drivers.</li>
 <li>VK_EXT_buffer_device_address on Intel and RADV.</li>
 <li>VK_NV_compute_shader_derivatives on Intel.</li>
+<li>VK_EXT_descriptor_indexing on Intel.</li>
 </ul>
 
 <h2>Bug fixes</h2>
index c8747caa060c0dca99fc48838591ba86fb31dd4e..1ad89185dd72d0277a797d03965d48d6c589518c 100644 (file)
@@ -208,7 +208,12 @@ anv_descriptor_requires_bindless(const struct anv_physical_device *pdevice,
    if (pdevice->always_use_bindless)
       return anv_descriptor_supports_bindless(pdevice, binding, sampler);
 
-   return false;
+   static const VkDescriptorBindingFlagBitsEXT flags_requiring_bindless =
+      VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT_EXT |
+      VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT_EXT |
+      VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT_EXT;
+
+   return (binding->flags & flags_requiring_bindless) != 0;
 }
 
 void anv_GetDescriptorSetLayoutSupport(
index e7793f1170fdcedc22fc9e724d6206aaf60d4454..e7c0212f2e9fe0a7bbaf46806067067b5d4c9beb 100644 (file)
@@ -1031,11 +1031,37 @@ void anv_GetPhysicalDeviceFeatures2(
          break;
       }
 
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT: {
+         VkPhysicalDeviceDescriptorIndexingFeaturesEXT *features =
+            (VkPhysicalDeviceDescriptorIndexingFeaturesEXT *)ext;
+         features->shaderInputAttachmentArrayDynamicIndexing = false;
+         features->shaderUniformTexelBufferArrayDynamicIndexing = true;
+         features->shaderStorageTexelBufferArrayDynamicIndexing = true;
+         features->shaderUniformBufferArrayNonUniformIndexing = false;
+         features->shaderSampledImageArrayNonUniformIndexing = true;
+         features->shaderStorageBufferArrayNonUniformIndexing = true;
+         features->shaderStorageImageArrayNonUniformIndexing = true;
+         features->shaderInputAttachmentArrayNonUniformIndexing = false;
+         features->shaderUniformTexelBufferArrayNonUniformIndexing = true;
+         features->shaderStorageTexelBufferArrayNonUniformIndexing = true;
+         features->descriptorBindingUniformBufferUpdateAfterBind = false;
+         features->descriptorBindingSampledImageUpdateAfterBind = true;
+         features->descriptorBindingStorageImageUpdateAfterBind = true;
+         features->descriptorBindingStorageBufferUpdateAfterBind = true;
+         features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
+         features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
+         features->descriptorBindingUpdateUnusedWhilePending = true;
+         features->descriptorBindingPartiallyBound = true;
+         features->descriptorBindingVariableDescriptorCount = false;
+         features->runtimeDescriptorArray = true;
+         break;
+      }
+
       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
          VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
             (VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
          features->inlineUniformBlock = true;
-         features->descriptorBindingInlineUniformBlockUpdateAfterBind = false;
+         features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
          break;
       }
 
@@ -1316,6 +1342,49 @@ void anv_GetPhysicalDeviceProperties2(
          break;
       }
 
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT: {
+         VkPhysicalDeviceDescriptorIndexingPropertiesEXT *props =
+            (VkPhysicalDeviceDescriptorIndexingPropertiesEXT *)ext;
+
+         /* It's a bit hard to exactly map our implementation to the limits
+          * described here.  The bindless surface handle in the extended
+          * message descriptors is 20 bits and it's an index into the table of
+          * RENDER_SURFACE_STATE structs that starts at bindless surface base
+          * address.  Given that most things consume two surface states per
+          * view (general/sampled for textures and write-only/read-write for
+          * images), we claim 2^19 things.
+          *
+          * For SSBOs, we just use A64 messages so there is no real limit
+          * there beyond the limit on the total size of a descriptor set.
+          */
+         const unsigned max_bindless_views = 1 << 19;
+
+         props->maxUpdateAfterBindDescriptorsInAllPools = max_bindless_views;
+         props->shaderUniformBufferArrayNonUniformIndexingNative = false;
+         props->shaderSampledImageArrayNonUniformIndexingNative = false;
+         props->shaderStorageBufferArrayNonUniformIndexingNative = true;
+         props->shaderStorageImageArrayNonUniformIndexingNative = false;
+         props->shaderInputAttachmentArrayNonUniformIndexingNative = false;
+         props->robustBufferAccessUpdateAfterBind = true;
+         props->quadDivergentImplicitLod = false;
+         props->maxPerStageDescriptorUpdateAfterBindSamplers = max_bindless_views;
+         props->maxPerStageDescriptorUpdateAfterBindUniformBuffers = 0;
+         props->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX;
+         props->maxPerStageDescriptorUpdateAfterBindSampledImages = max_bindless_views;
+         props->maxPerStageDescriptorUpdateAfterBindStorageImages = max_bindless_views;
+         props->maxPerStageDescriptorUpdateAfterBindInputAttachments = 0;
+         props->maxPerStageUpdateAfterBindResources = UINT32_MAX;
+         props->maxDescriptorSetUpdateAfterBindSamplers = max_bindless_views;
+         props->maxDescriptorSetUpdateAfterBindUniformBuffers = 0;
+         props->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = 0;
+         props->maxDescriptorSetUpdateAfterBindStorageBuffers = UINT32_MAX;
+         props->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
+         props->maxDescriptorSetUpdateAfterBindSampledImages = max_bindless_views;
+         props->maxDescriptorSetUpdateAfterBindStorageImages = max_bindless_views;
+         props->maxDescriptorSetUpdateAfterBindInputAttachments = 0;
+         break;
+      }
+
       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR: {
          VkPhysicalDeviceDriverPropertiesKHR *driver_props =
             (VkPhysicalDeviceDriverPropertiesKHR *) ext;
index d937e4e4524210ca912f607cc15cceeb3380c9ab..afd327eb4a50092cf5591b79c5a39b22e7bb0f55 100644 (file)
@@ -122,6 +122,8 @@ EXTENSIONS = [
     Extension('VK_EXT_conditional_rendering',             1, 'device->info.gen >= 8 || device->info.is_haswell'),
     Extension('VK_EXT_debug_report',                      8, True),
     Extension('VK_EXT_depth_clip_enable',                 1, True),
+    Extension('VK_EXT_descriptor_indexing',               2,
+              'device->has_a64_buffer_access && device->has_bindless_images'),
     Extension('VK_EXT_direct_mode_display',               1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
     Extension('VK_EXT_display_control',                   1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
     Extension('VK_EXT_display_surface_counter',           1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
index 846964b04e789b6ea278332a802f3fd270c93ba5..23b1cb72098d15964bf51e52c9fe0433bcbb24b1 100644 (file)
@@ -268,6 +268,12 @@ try_lower_direct_buffer_intrinsic(nir_intrinsic_instr *intrin, bool is_atomic,
    if (is_atomic && nir_dest_bit_size(intrin->dest) == 64)
       return false;
 
+   /* Normal binding table-based messages can't handle non-uniform access so
+    * we have to fall back to A64.
+    */
+   if (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM)
+      return false;
+
    if (!nir_deref_find_descriptor(deref, state))
       return false;
 
index 09abf4e85fd849aedca05212aa060df74de5606e..64d4d93803cc4f4b828347590613404c751e2e24 100644 (file)
@@ -138,6 +138,7 @@ anv_shader_compile_to_nir(struct anv_device *device,
       .lower_workgroup_access_to_offsets = true,
       .caps = {
          .derivative_group = true,
+         .descriptor_array_dynamic_indexing = true,
          .device_group = true,
          .draw_parameters = true,
          .float16 = pdevice->info.gen >= 8,
@@ -152,6 +153,7 @@ anv_shader_compile_to_nir(struct anv_device *device,
          .multiview = true,
          .physical_storage_buffer_address = pdevice->has_a64_buffer_access,
          .post_depth_coverage = pdevice->info.gen >= 9,
+         .runtime_descriptor_array = true,
          .shader_viewport_index_layer = true,
          .stencil_export = pdevice->info.gen >= 9,
          .storage_8bit = pdevice->info.gen >= 8,
@@ -638,6 +640,13 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
                  ssbo_address_format);
 
       NIR_PASS_V(nir, nir_opt_constant_folding);
+
+      /* We don't support non-uniform UBOs and non-uniform SSBO access is
+       * handled naturally by falling back to A64 messages.
+       */
+      NIR_PASS_V(nir, nir_lower_non_uniform_access,
+                 nir_lower_non_uniform_texture_access |
+                 nir_lower_non_uniform_image_access);
    }
 
    if (nir->info.stage != MESA_SHADER_COMPUTE)