anv: Implement VK_EXT_custom_border_color
authorIván Briano <ivan.briano@intel.com>
Thu, 23 Apr 2020 00:08:22 +0000 (17:08 -0700)
committerMarge Bot <eric+marge@anholt.net>
Wed, 13 May 2020 23:20:50 +0000 (23:20 +0000)
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4898>

src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_extensions.py
src/intel/vulkan/anv_private.h
src/intel/vulkan/genX_state.c

index 993fa06de98a7e17a4d62b97e4fe9a32ca3c2530..ba0b14ac7146bd6d870aff7a48258f2ec672199a 100644 (file)
@@ -1123,6 +1123,14 @@ void anv_GetPhysicalDeviceFeatures2(
          break;
       }
 
          break;
       }
 
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
+         VkPhysicalDeviceCustomBorderColorFeaturesEXT *features =
+            (VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext;
+         features->customBorderColors = pdevice->info.gen >= 8;
+         features->customBorderColorWithoutFormat = pdevice->info.gen >= 8;
+         break;
+      }
+
       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
          VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
             (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
          VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
             (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
@@ -1401,6 +1409,8 @@ void anv_GetPhysicalDeviceFeatures2(
 #define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
 #define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS       256
 
 #define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
 #define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS       256
 
+#define MAX_CUSTOM_BORDER_COLORS                   4096
+
 void anv_GetPhysicalDeviceProperties(
     VkPhysicalDevice                            physicalDevice,
     VkPhysicalDeviceProperties*                 pProperties)
 void anv_GetPhysicalDeviceProperties(
     VkPhysicalDevice                            physicalDevice,
     VkPhysicalDeviceProperties*                 pProperties)
@@ -1756,6 +1766,13 @@ void anv_GetPhysicalDeviceProperties2(
 
    vk_foreach_struct(ext, pProperties->pNext) {
       switch (ext->sType) {
 
    vk_foreach_struct(ext, pProperties->pNext) {
       switch (ext->sType) {
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
+         VkPhysicalDeviceCustomBorderColorPropertiesEXT *properties =
+            (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
+         properties->maxCustomBorderColorSamplers = MAX_CUSTOM_BORDER_COLORS;
+         break;
+      }
+
       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES_KHR: {
          VkPhysicalDeviceDepthStencilResolvePropertiesKHR *properties =
             (VkPhysicalDeviceDepthStencilResolvePropertiesKHR *)ext;
       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES_KHR: {
          VkPhysicalDeviceDepthStencilResolvePropertiesKHR *properties =
             (VkPhysicalDeviceDepthStencilResolvePropertiesKHR *)ext;
@@ -2359,34 +2376,6 @@ anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align,
    return state;
 }
 
    return state;
 }
 
-/* Haswell border color is a bit of a disaster.  Float and unorm formats use a
- * straightforward 32-bit float color in the first 64 bytes.  Instead of using
- * a nice float/integer union like Gen8+, Haswell specifies the integer border
- * color as a separate entry /after/ the float color.  The layout of this entry
- * also depends on the format's bpp (with extra hacks for RG32), and overlaps.
- *
- * Since we don't know the format/bpp, we can't make any of the border colors
- * containing '1' work for all formats, as it would be in the wrong place for
- * some of them.  We opt to make 32-bit integers work as this seems like the
- * most common option.  Fortunately, transparent black works regardless, as
- * all zeroes is the same in every bit-size.
- */
-struct hsw_border_color {
-   float float32[4];
-   uint32_t _pad0[12];
-   uint32_t uint32[4];
-   uint32_t _pad1[108];
-};
-
-struct gen8_border_color {
-   union {
-      float float32[4];
-      uint32_t uint32[4];
-   };
-   /* Pad out to 64 bytes */
-   uint32_t _pad[12];
-};
-
 static void
 anv_device_init_border_colors(struct anv_device *device)
 {
 static void
 anv_device_init_border_colors(struct anv_device *device)
 {
@@ -2877,6 +2866,19 @@ VkResult anv_CreateDevice(
    if (result != VK_SUCCESS)
       goto fail_batch_bo_pool;
 
    if (result != VK_SUCCESS)
       goto fail_batch_bo_pool;
 
+   if (device->info.gen >= 8) {
+      /* The border color pointer is limited to 24 bits, so we need to make
+       * sure that any such color used at any point in the program doesn't
+       * exceed that limit.
+       * We achieve that by reserving all the custom border colors we support
+       * right off the bat, so they are close to the base address.
+       */
+      anv_state_reserved_pool_init(&device->custom_border_colors,
+                                   &device->dynamic_state_pool,
+                                   sizeof(struct gen8_border_color),
+                                   MAX_CUSTOM_BORDER_COLORS, 64);
+   }
+
    result = anv_state_pool_init(&device->instruction_state_pool, device,
                                 INSTRUCTION_STATE_POOL_MIN_ADDRESS, 0, 16384);
    if (result != VK_SUCCESS)
    result = anv_state_pool_init(&device->instruction_state_pool, device,
                                 INSTRUCTION_STATE_POOL_MIN_ADDRESS, 0, 16384);
    if (result != VK_SUCCESS)
@@ -2997,6 +2999,8 @@ VkResult anv_CreateDevice(
  fail_instruction_state_pool:
    anv_state_pool_finish(&device->instruction_state_pool);
  fail_dynamic_state_pool:
  fail_instruction_state_pool:
    anv_state_pool_finish(&device->instruction_state_pool);
  fail_dynamic_state_pool:
+   if (device->info.gen >= 8)
+      anv_state_reserved_pool_finish(&device->custom_border_colors);
    anv_state_pool_finish(&device->dynamic_state_pool);
  fail_batch_bo_pool:
    anv_bo_pool_finish(&device->batch_bo_pool);
    anv_state_pool_finish(&device->dynamic_state_pool);
  fail_batch_bo_pool:
    anv_bo_pool_finish(&device->batch_bo_pool);
@@ -3042,6 +3046,8 @@ void anv_DestroyDevice(
    /* We only need to free these to prevent valgrind errors.  The backing
     * BO will go away in a couple of lines so we don't actually leak.
     */
    /* We only need to free these to prevent valgrind errors.  The backing
     * BO will go away in a couple of lines so we don't actually leak.
     */
+   if (device->info.gen >= 8)
+      anv_state_reserved_pool_finish(&device->custom_border_colors);
    anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
    anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash);
 #endif
    anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
    anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash);
 #endif
@@ -4274,6 +4280,11 @@ void anv_DestroySampler(
                           sampler->bindless_state);
    }
 
                           sampler->bindless_state);
    }
 
+   if (sampler->custom_border_color.map) {
+      anv_state_reserved_pool_free(&device->custom_border_colors,
+                                   sampler->custom_border_color);
+   }
+
    vk_object_base_finish(&sampler->base);
    vk_free2(&device->vk.alloc, pAllocator, sampler);
 }
    vk_object_base_finish(&sampler->base);
    vk_free2(&device->vk.alloc, pAllocator, sampler);
 }
index 67fcba4c32cc6f921f91402462b6acff544e501e..926061bb9978b89f0b19905ae366b4696c7d4a40 100644 (file)
@@ -118,6 +118,7 @@ EXTENSIONS = [
     Extension('VK_EXT_buffer_device_address',             1, 'device->has_a64_buffer_access'),
     Extension('VK_EXT_calibrated_timestamps',             1, True),
     Extension('VK_EXT_conditional_rendering',             1, 'device->info.gen >= 8 || device->info.is_haswell'),
     Extension('VK_EXT_buffer_device_address',             1, 'device->has_a64_buffer_access'),
     Extension('VK_EXT_calibrated_timestamps',             1, True),
     Extension('VK_EXT_conditional_rendering',             1, 'device->info.gen >= 8 || device->info.is_haswell'),
+    Extension('VK_EXT_custom_border_color',               12, 'device->info.gen >= 8'),
     Extension('VK_EXT_debug_report',                      8, True),
     Extension('VK_EXT_depth_clip_enable',                 1, True),
     Extension('VK_EXT_descriptor_indexing',               2,
     Extension('VK_EXT_debug_report',                      8, True),
     Extension('VK_EXT_depth_clip_enable',                 1, True),
     Extension('VK_EXT_descriptor_indexing',               2,
index e2914a2800fd06cf54854d6d03e391c39ed239ca..1900cc178345f2623c2bf8ee5831c6a7ae323a10 100644 (file)
@@ -1318,6 +1318,8 @@ struct anv_device {
     struct anv_state_pool                       binding_table_pool;
     struct anv_state_pool                       surface_state_pool;
 
     struct anv_state_pool                       binding_table_pool;
     struct anv_state_pool                       surface_state_pool;
 
+    struct anv_state_reserved_pool              custom_border_colors;
+
     /** BO used for various workarounds
      *
      * There are a number of workarounds on our hardware which require writing
     /** BO used for various workarounds
      *
      * There are a number of workarounds on our hardware which require writing
@@ -4077,6 +4079,34 @@ anv_clear_color_from_att_state(union isl_color_value *clear_color,
 }
 
 
 }
 
 
+/* Haswell border color is a bit of a disaster.  Float and unorm formats use a
+ * straightforward 32-bit float color in the first 64 bytes.  Instead of using
+ * a nice float/integer union like Gen8+, Haswell specifies the integer border
+ * color as a separate entry /after/ the float color.  The layout of this entry
+ * also depends on the format's bpp (with extra hacks for RG32), and overlaps.
+ *
+ * Since we don't know the format/bpp, we can't make any of the border colors
+ * containing '1' work for all formats, as it would be in the wrong place for
+ * some of them.  We opt to make 32-bit integers work as this seems like the
+ * most common option.  Fortunately, transparent black works regardless, as
+ * all zeroes is the same in every bit-size.
+ */
+struct hsw_border_color {
+   float float32[4];
+   uint32_t _pad0[12];
+   uint32_t uint32[4];
+   uint32_t _pad1[108];
+};
+
+struct gen8_border_color {
+   union {
+      float float32[4];
+      uint32_t uint32[4];
+   };
+   /* Pad out to 64 bytes */
+   uint32_t _pad[12];
+};
+
 struct anv_ycbcr_conversion {
    struct vk_object_base base;
 
 struct anv_ycbcr_conversion {
    struct vk_object_base base;
 
@@ -4100,6 +4130,8 @@ struct anv_sampler {
     * and with a 32-byte stride for use as bindless samplers.
     */
    struct anv_state             bindless_state;
     * and with a 32-byte stride for use as bindless samplers.
     */
    struct anv_state             bindless_state;
+
+   struct anv_state             custom_border_color;
 };
 
 struct anv_framebuffer {
 };
 
 struct anv_framebuffer {
index ea953b7842b1f5005a94083a08bddf82ec125ae6..c9735df68bfd361b69c265931f5408afc235d81d 100644 (file)
@@ -367,9 +367,18 @@ VkResult genX(CreateSampler)(
    sampler->n_planes = 1;
 
    uint32_t border_color_stride = GEN_IS_HASWELL ? 512 : 64;
    sampler->n_planes = 1;
 
    uint32_t border_color_stride = GEN_IS_HASWELL ? 512 : 64;
-   uint32_t border_color_offset = device->border_colors.offset +
-                                  pCreateInfo->borderColor *
-                                  border_color_stride;
+   uint32_t border_color_offset;
+   ASSERTED bool has_custom_color = false;
+   if (pCreateInfo->borderColor <= VK_BORDER_COLOR_INT_OPAQUE_WHITE) {
+      border_color_offset = device->border_colors.offset +
+                            pCreateInfo->borderColor *
+                            border_color_stride;
+   } else {
+      assert(GEN_GEN >= 8);
+      sampler->custom_border_color =
+         anv_state_reserved_pool_alloc(&device->custom_border_colors);
+      border_color_offset = sampler->custom_border_color.offset;
+   }
 
 #if GEN_GEN >= 9
    unsigned sampler_reduction_mode = STD_FILTER;
 
 #if GEN_GEN >= 9
    unsigned sampler_reduction_mode = STD_FILTER;
@@ -406,12 +415,37 @@ VkResult genX(CreateSampler)(
          break;
       }
 #endif
          break;
       }
 #endif
+      case VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT: {
+         VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
+            (VkSamplerCustomBorderColorCreateInfoEXT *) ext;
+         if (sampler->custom_border_color.map == NULL)
+            break;
+         struct gen8_border_color *cbc = sampler->custom_border_color.map;
+         if (custom_border_color->format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) {
+            /* B4G4R4A4_UNORM_PACK16 is treated as R4G4B4A4_UNORM_PACK16 with
+             * a swizzle, but this does not carry over to the sampler for
+             * border colors, so we need to do the swizzle ourselves here.
+             */
+            cbc->uint32[0] = custom_border_color->customBorderColor.uint32[2];
+            cbc->uint32[1] = custom_border_color->customBorderColor.uint32[1];
+            cbc->uint32[2] = custom_border_color->customBorderColor.uint32[0];
+            cbc->uint32[3] = custom_border_color->customBorderColor.uint32[3];
+         } else {
+            /* Both structs share the same layout, so just copy them over. */
+            memcpy(cbc, &custom_border_color->customBorderColor,
+                   sizeof(VkClearColorValue));
+         }
+         has_custom_color = true;
+         break;
+      }
       default:
          anv_debug_ignored_stype(ext->sType);
          break;
       }
    }
 
       default:
          anv_debug_ignored_stype(ext->sType);
          break;
       }
    }
 
+   assert((sampler->custom_border_color.map == NULL) || has_custom_color);
+
    if (device->physical->has_bindless_samplers) {
       /* If we have bindless, allocate enough samplers.  We allocate 32 bytes
        * for each sampler instead of 16 bytes because we want all bindless
    if (device->physical->has_bindless_samplers) {
       /* If we have bindless, allocate enough samplers.  We allocate 32 bytes
        * for each sampler instead of 16 bytes because we want all bindless