{
#ifdef ENABLE_SHADER_CACHE
char renderer[10];
- MAYBE_UNUSED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
+ ASSERTED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
device->chipset_id);
assert(len == sizeof(renderer) - 2);
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT: {
+ VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *features =
+ (VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *)ext;
+ features->fragmentShaderSampleInterlock = pdevice->info.gen >= 9;
+ features->fragmentShaderPixelInterlock = pdevice->info.gen >= 9;
+ features->fragmentShaderShadingRateInterlock = false;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT: {
VkPhysicalDeviceHostQueryResetFeaturesEXT *features =
(VkPhysicalDeviceHostQueryResetFeaturesEXT *)ext;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
+ VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
+ (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
+ features->indexTypeUint8 = true;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
(VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES_KHR: {
+ VkPhysicalDeviceImagelessFramebufferFeaturesKHR *features =
+ (VkPhysicalDeviceImagelessFramebufferFeaturesKHR *)ext;
+ features->imagelessFramebuffer = true;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
VkPhysicalDeviceProtectedMemoryFeatures *features = (void *)ext;
features->protectedMemory = false;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT: {
+ VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *features = (void *)ext;
+ features->shaderDemoteToHelperInvocation = true;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: {
VkPhysicalDeviceShaderDrawParametersFeatures *features = (void *)ext;
features->shaderDrawParameters = true;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
+ VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
+ (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
+ features->texelBufferAlignment = true;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: {
VkPhysicalDeviceVariablePointersFeatures *features = (void *)ext;
features->variablePointersStorageBuffer = true;
.maxFragmentOutputAttachments = 8,
.maxFragmentDualSrcAttachments = 1,
.maxFragmentCombinedOutputResources = 8,
- .maxComputeSharedMemorySize = 32768,
+ .maxComputeSharedMemorySize = 64 * 1024,
.maxComputeWorkGroupCount = { 65535, 65535, 65535 },
.maxComputeWorkGroupInvocations = 32 * devinfo->max_cs_threads,
.maxComputeWorkGroupSize = {
.viewportBoundsRange = { INT16_MIN, INT16_MAX },
.viewportSubPixelBits = 13, /* We take a float? */
.minMemoryMapAlignment = 4096, /* A page */
- .minTexelBufferOffsetAlignment = 1,
+ /* The dataport requires texel alignment so we need to assume a worst
+ * case of R32G32B32A32 which is 16 bytes.
+ */
+ .minTexelBufferOffsetAlignment = 16,
/* We need 16 for UBO block reads to work and 32 for push UBOs */
.minUniformBufferOffsetAlignment = 32,
.minStorageBufferOffsetAlignment = 4,
.sampledImageStencilSampleCounts = sample_counts,
.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
.maxSampleMaskWords = 1,
- .timestampComputeAndGraphics = false,
+ .timestampComputeAndGraphics = true,
.timestampPeriod = 1000000000.0 / devinfo->timestamp_frequency,
.maxClipDistances = 8,
.maxCullDistances = 8,
(VkPhysicalDeviceDriverPropertiesKHR *) ext;
driver_props->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA_KHR;
- util_snprintf(driver_props->driverName, VK_MAX_DRIVER_NAME_SIZE_KHR,
- "Intel open-source Mesa driver");
+ snprintf(driver_props->driverName, VK_MAX_DRIVER_NAME_SIZE_KHR,
+ "Intel open-source Mesa driver");
- util_snprintf(driver_props->driverInfo, VK_MAX_DRIVER_INFO_SIZE_KHR,
- "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
+ snprintf(driver_props->driverInfo, VK_MAX_DRIVER_INFO_SIZE_KHR,
+ "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
driver_props->conformanceVersion = (VkConformanceVersionKHR) {
.major = 1,
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {
VkPhysicalDevicePointClippingProperties *properties =
(VkPhysicalDevicePointClippingProperties *) ext;
- properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
- anv_finishme("Implement pop-free point clipping");
+ properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY;
break;
}
properties->supportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
VK_SUBGROUP_FEATURE_VOTE_BIT |
- VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
VK_SUBGROUP_FEATURE_BALLOT_BIT |
VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
- VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
VK_SUBGROUP_FEATURE_QUAD_BIT;
- properties->quadOperationsInAllStages = true;
+ if (pdevice->info.gen >= 8) {
+ /* TODO: There's no technical reason why these can't be made to
+ * work on gen7 but they don't at the moment so it's best to leave
+ * the feature disabled than enabled and broken.
+ */
+ properties->supportedOperations |=
+ VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
+ VK_SUBGROUP_FEATURE_CLUSTERED_BIT;
+ }
+ properties->quadOperationsInAllStages = pdevice->info.gen >= 8;
+ break;
+ }
+
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT: {
+ VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *props =
+ (VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *)ext;
+ STATIC_ASSERT(8 <= BRW_SUBGROUP_SIZE && BRW_SUBGROUP_SIZE <= 32);
+ props->minSubgroupSize = 8;
+ props->maxSubgroupSize = 32;
+ props->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_threads;
+ props->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
+ break;
+ }
+
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: {
+ VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *props =
+ (VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *)ext;
+
+ /* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface
+ * Base Address:
+ *
+ * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field
+ * specifies the base address of the first element of the surface,
+ * computed in software by adding the surface base address to the
+ * byte offset of the element in the buffer. The base address must
+ * be aligned to element size."
+ *
+ * The typed dataport messages require that things be texel aligned.
+ * Otherwise, we may just load/store the wrong data or, in the worst
+ * case, there may be hangs.
+ */
+ props->storageTexelBufferOffsetAlignmentBytes = 16;
+ props->storageTexelBufferOffsetSingleTexelAlignment = true;
+
+ /* The sampler, however, is much more forgiving and it can handle
+ * arbitrary byte alignment for linear and buffer surfaces. It's
+ * hard to find a good PRM citation for this but years of empirical
+ * experience demonstrate that this is true.
+ */
+ props->uniformTexelBufferOffsetAlignmentBytes = 1;
+ props->uniformTexelBufferOffsetSingleTexelAlignment = false;
break;
}
return state;
}
+/* Haswell border color is a bit of a disaster. Float and unorm formats use a
+ * straightforward 32-bit float color in the first 64 bytes. Instead of using
+ * a nice float/integer union like Gen8+, Haswell specifies the integer border
+ * color as a separate entry /after/ the float color. The layout of this entry
+ * also depends on the format's bpp (with extra hacks for RG32), and overlaps.
+ *
+ * Since we don't know the format/bpp, we can't make any of the border colors
+ * containing '1' work for all formats, as it would be in the wrong place for
+ * some of them. We opt to make 32-bit integers work as this seems like the
+ * most common option. Fortunately, transparent black works regardless, as
+ * all zeroes is the same in every bit-size.
+ */
+struct hsw_border_color {
+ float float32[4];
+ uint32_t _pad0[12];
+ uint32_t uint32[4];
+ uint32_t _pad1[108];
+};
+
struct gen8_border_color {
union {
float float32[4];
static void
anv_device_init_border_colors(struct anv_device *device)
{
- static const struct gen8_border_color border_colors[] = {
- [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
- [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
- [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
- [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
- [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
- [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
- };
+ if (device->info.is_haswell) {
+ static const struct hsw_border_color border_colors[] = {
+ [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
+ [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
+ [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
+ [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
+ [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
+ [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
+ };
+
+ device->border_colors =
+ anv_state_pool_emit_data(&device->dynamic_state_pool,
+ sizeof(border_colors), 512, border_colors);
+ } else {
+ static const struct gen8_border_color border_colors[] = {
+ [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
+ [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
+ [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
+ [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
+ [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
+ [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
+ };
- device->border_colors = anv_state_pool_emit_data(&device->dynamic_state_pool,
- sizeof(border_colors), 64,
- border_colors);
+ device->border_colors =
+ anv_state_pool_emit_data(&device->dynamic_state_pool,
+ sizeof(border_colors), 64, border_colors);
+ }
}
static void
if (!device)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
- const unsigned decode_flags =
- GEN_BATCH_DECODE_FULL |
- ((INTEL_DEBUG & DEBUG_COLOR) ? GEN_BATCH_DECODE_IN_COLOR : 0) |
- GEN_BATCH_DECODE_OFFSETS |
- GEN_BATCH_DECODE_FLOATS;
+ if (INTEL_DEBUG & DEBUG_BATCH) {
+ const unsigned decode_flags =
+ GEN_BATCH_DECODE_FULL |
+ ((INTEL_DEBUG & DEBUG_COLOR) ? GEN_BATCH_DECODE_IN_COLOR : 0) |
+ GEN_BATCH_DECODE_OFFSETS |
+ GEN_BATCH_DECODE_FLOATS;
- gen_batch_decode_ctx_init(&device->decoder_ctx,
- &physical_device->info,
- stderr, decode_flags, NULL,
- decode_get_bo, NULL, device);
+ gen_batch_decode_ctx_init(&device->decoder_ctx,
+ &physical_device->info,
+ stderr, decode_flags, NULL,
+ decode_get_bo, NULL, device);
+ }
device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
device->instance = physical_device->instance;
goto fail_surface_state_pool;
}
- result = anv_bo_init_new(&device->workaround_bo, device, 1024);
+ result = anv_bo_init_new(&device->workaround_bo, device, 4096);
if (result != VK_SUCCESS)
goto fail_binding_table_pool;
anv_gem_destroy_context(device, device->context_id);
- gen_batch_decode_ctx_finish(&device->decoder_ctx);
+ if (INTEL_DEBUG & DEBUG_BATCH)
+ gen_batch_decode_ctx_finish(&device->decoder_ctx);
close(device->fd);
util_vma_heap_free(&device->vma_lo, addr_48b, bo->size);
device->vma_lo_available += bo->size;
} else {
- MAYBE_UNUSED const struct anv_physical_device *physical_device =
+ ASSERTED const struct anv_physical_device *physical_device =
&device->instance->physicalDevice;
assert(addr_48b >= physical_device->memory.heaps[0].vma_start &&
addr_48b < (physical_device->memory.heaps[0].vma_start +
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
- size_t size = sizeof(*framebuffer) +
- sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount;
- framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (framebuffer == NULL)
- return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ size_t size = sizeof(*framebuffer);
+
+ /* VK_KHR_imageless_framebuffer extension says:
+ *
+ * If flags includes VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT_KHR,
+ * parameter pAttachments is ignored.
+ */
+ if (!(pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT_KHR)) {
+ size += sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount;
+ framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (framebuffer == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
+ ANV_FROM_HANDLE(anv_image_view, iview, pCreateInfo->pAttachments[i]);
+ framebuffer->attachments[i] = iview;
+ }
+ framebuffer->attachment_count = pCreateInfo->attachmentCount;
+ } else {
+ assert(device->enabled_extensions.KHR_imageless_framebuffer);
+ framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (framebuffer == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
- framebuffer->attachment_count = pCreateInfo->attachmentCount;
- for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
- VkImageView _iview = pCreateInfo->pAttachments[i];
- framebuffer->attachments[i] = anv_image_view_from_handle(_iview);
+ framebuffer->attachment_count = 0;
}
framebuffer->width = pCreateInfo->width;