#include "util/build_id.h"
#include "util/disk_cache.h"
#include "util/mesa-sha1.h"
+#include "util/os_file.h"
+#include "util/u_atomic.h"
#include "util/u_string.h"
#include "git_sha1.h"
#include "vk_util.h"
_mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
_mesa_sha1_update(&sha1_ctx, &device->chipset_id,
sizeof(device->chipset_id));
+ _mesa_sha1_update(&sha1_ctx, &device->always_use_bindless,
+ sizeof(device->always_use_bindless));
+ _mesa_sha1_update(&sha1_ctx, &device->has_a64_buffer_access,
+ sizeof(device->has_a64_buffer_access));
+ _mesa_sha1_update(&sha1_ctx, &device->has_bindless_images,
+ sizeof(device->has_bindless_images));
+ _mesa_sha1_update(&sha1_ctx, &device->has_bindless_samplers,
+ sizeof(device->has_bindless_samplers));
_mesa_sha1_final(&sha1_ctx, sha1);
memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
#endif
}
+static uint64_t
+get_available_system_memory()
+{
+ char *meminfo = os_read_file("/proc/meminfo");
+ if (!meminfo)
+ return 0;
+
+ char *str = strstr(meminfo, "MemAvailable:");
+ if (!str) {
+ free(meminfo);
+ return 0;
+ }
+
+ uint64_t kb_mem_available;
+ if (sscanf(str, "MemAvailable: %" PRIx64, &kb_mem_available) == 1) {
+ free(meminfo);
+ return kb_mem_available << 10;
+ }
+
+ free(meminfo);
+ return 0;
+}
+
static VkResult
anv_physical_device_init(struct anv_physical_device *device,
struct anv_instance *instance,
intel_logw("Ivy Bridge Vulkan support is incomplete");
} else if (device->info.gen == 7 && device->info.is_baytrail) {
intel_logw("Bay Trail Vulkan support is incomplete");
- } else if (device->info.gen >= 8 && device->info.gen <= 10) {
- /* Gen8-10 fully supported */
- } else if (device->info.gen == 11) {
- intel_logw("Vulkan is not yet fully supported on gen11.");
+ } else if (device->info.gen >= 8 && device->info.gen <= 11) {
+ /* Gen8-11 fully supported */
} else {
result = vk_errorf(device->instance, device,
VK_ERROR_INCOMPATIBLE_DRIVER,
device->has_context_isolation =
anv_gem_get_param(fd, I915_PARAM_HAS_CONTEXT_ISOLATION);
+ device->always_use_bindless =
+ env_var_as_boolean("ANV_ALWAYS_BINDLESS", false);
+
+ /* We first got the A64 messages on broadwell and we can only use them if
+ * we can pass addresses directly into the shader which requires softpin.
+ */
+ device->has_a64_buffer_access = device->info.gen >= 8 &&
+ device->use_softpin;
+
+ /* We first get bindless image access on Skylake and we can only really do
+ * it if we don't have any relocations so we need softpin.
+ */
+ device->has_bindless_images = device->info.gen >= 9 &&
+ device->use_softpin;
+
+ /* We've had bindless samplers since Ivy Bridge (forever in Vulkan terms)
+ * because it's just a matter of setting the sampler address in the sample
+ * message header. However, we've not bothered to wire it up for vec4 so
+ * we leave it disabled on gen7.
+ */
+ device->has_bindless_samplers = device->info.gen >= 8;
+
+ device->has_mem_available = get_available_system_memory() != 0;
+
/* Starting with Gen10, the timestamp frequency of the command streamer may
* vary from one part to another. We can query the value from the kernel.
*/
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features = (void *)ext;
- features->bufferDeviceAddress = pdevice->use_softpin &&
- pdevice->info.gen >= 8;
+ features->bufferDeviceAddress = pdevice->has_a64_buffer_access;
features->bufferDeviceAddressCaptureReplay = false;
features->bufferDeviceAddressMultiDevice = false;
break;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT: {
+ VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *features =
+ (VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *)ext;
+ features->fragmentShaderSampleInterlock = pdevice->info.gen >= 9;
+ features->fragmentShaderPixelInterlock = pdevice->info.gen >= 9;
+ features->fragmentShaderShadingRateInterlock = false;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT: {
VkPhysicalDeviceHostQueryResetFeaturesEXT *features =
(VkPhysicalDeviceHostQueryResetFeaturesEXT *)ext;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT: {
+ VkPhysicalDeviceDescriptorIndexingFeaturesEXT *features =
+ (VkPhysicalDeviceDescriptorIndexingFeaturesEXT *)ext;
+ features->shaderInputAttachmentArrayDynamicIndexing = false;
+ features->shaderUniformTexelBufferArrayDynamicIndexing = true;
+ features->shaderStorageTexelBufferArrayDynamicIndexing = true;
+ features->shaderUniformBufferArrayNonUniformIndexing = false;
+ features->shaderSampledImageArrayNonUniformIndexing = true;
+ features->shaderStorageBufferArrayNonUniformIndexing = true;
+ features->shaderStorageImageArrayNonUniformIndexing = true;
+ features->shaderInputAttachmentArrayNonUniformIndexing = false;
+ features->shaderUniformTexelBufferArrayNonUniformIndexing = true;
+ features->shaderStorageTexelBufferArrayNonUniformIndexing = true;
+ features->descriptorBindingUniformBufferUpdateAfterBind = false;
+ features->descriptorBindingSampledImageUpdateAfterBind = true;
+ features->descriptorBindingStorageImageUpdateAfterBind = true;
+ features->descriptorBindingStorageBufferUpdateAfterBind = true;
+ features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
+ features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
+ features->descriptorBindingUpdateUnusedWhilePending = true;
+ features->descriptorBindingPartiallyBound = true;
+ features->descriptorBindingVariableDescriptorCount = false;
+ features->runtimeDescriptorArray = true;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
(VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
features->inlineUniformBlock = true;
- features->descriptorBindingInlineUniformBlockUpdateAfterBind = false;
+ features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
break;
}
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR: {
+ VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *features = (void *)ext;
+ features->shaderBufferInt64Atomics =
+ pdevice->info.gen >= 9 && pdevice->use_softpin;
+ features->shaderSharedInt64Atomics = VK_FALSE;
+ break;
+ }
+
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT: {
+ VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *features = (void *)ext;
+ features->shaderDemoteToHelperInvocation = true;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: {
VkPhysicalDeviceShaderDrawParametersFeatures *features = (void *)ext;
features->shaderDrawParameters = true;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
+ VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
+ (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
+ features->texelBufferAlignment = true;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: {
VkPhysicalDeviceVariablePointersFeatures *features = (void *)ext;
features->variablePointersStorageBuffer = true;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR: {
+ VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR *features =
+ (VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR *)ext;
+ features->uniformBufferStandardLayout = true;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
(VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
}
}
+#define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS 64
+
+#define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
+#define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS 256
+
void anv_GetPhysicalDeviceProperties(
VkPhysicalDevice physicalDevice,
VkPhysicalDeviceProperties* pProperties)
const uint32_t max_raw_buffer_sz = devinfo->gen >= 7 ?
(1ul << 30) : (1ul << 27);
- const uint32_t max_samplers = (devinfo->gen >= 8 || devinfo->is_haswell) ?
- 128 : 16;
+ const uint32_t max_ssbos = pdevice->has_a64_buffer_access ? UINT16_MAX : 64;
+ const uint32_t max_textures =
+ pdevice->has_bindless_images ? UINT16_MAX : 128;
+ const uint32_t max_samplers =
+ pdevice->has_bindless_samplers ? UINT16_MAX :
+ (devinfo->gen >= 8 || devinfo->is_haswell) ? 128 : 16;
+ const uint32_t max_images =
+ pdevice->has_bindless_images ? UINT16_MAX : MAX_IMAGES;
+
+ /* The moment we have anything bindless, claim a high per-stage limit */
+ const uint32_t max_per_stage =
+ pdevice->has_a64_buffer_access ? UINT32_MAX :
+ MAX_BINDING_TABLE_SIZE - MAX_RTS;
VkSampleCountFlags sample_counts =
isl_device_get_sample_counts(&pdevice->isl_dev);
.sparseAddressSpaceSize = 0,
.maxBoundDescriptorSets = MAX_SETS,
.maxPerStageDescriptorSamplers = max_samplers,
- .maxPerStageDescriptorUniformBuffers = 64,
- .maxPerStageDescriptorStorageBuffers = 64,
- .maxPerStageDescriptorSampledImages = max_samplers,
- .maxPerStageDescriptorStorageImages = MAX_IMAGES,
- .maxPerStageDescriptorInputAttachments = 64,
- .maxPerStageResources = MAX_BINDING_TABLE_SIZE - MAX_RTS,
+ .maxPerStageDescriptorUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,
+ .maxPerStageDescriptorStorageBuffers = max_ssbos,
+ .maxPerStageDescriptorSampledImages = max_textures,
+ .maxPerStageDescriptorStorageImages = max_images,
+ .maxPerStageDescriptorInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS,
+ .maxPerStageResources = max_per_stage,
.maxDescriptorSetSamplers = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
- .maxDescriptorSetUniformBuffers = 6 * 64, /* number of stages * maxPerStageDescriptorUniformBuffers */
+ .maxDescriptorSetUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS, /* number of stages * maxPerStageDescriptorUniformBuffers */
.maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
- .maxDescriptorSetStorageBuffers = 6 * 64, /* number of stages * maxPerStageDescriptorStorageBuffers */
+ .maxDescriptorSetStorageBuffers = 6 * max_ssbos, /* number of stages * maxPerStageDescriptorStorageBuffers */
.maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
- .maxDescriptorSetSampledImages = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSampledImages */
- .maxDescriptorSetStorageImages = 6 * MAX_IMAGES, /* number of stages * maxPerStageDescriptorStorageImages */
- .maxDescriptorSetInputAttachments = 256,
+ .maxDescriptorSetSampledImages = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */
+ .maxDescriptorSetStorageImages = 6 * max_images, /* number of stages * maxPerStageDescriptorStorageImages */
+ .maxDescriptorSetInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS,
.maxVertexInputAttributes = MAX_VBS,
.maxVertexInputBindings = MAX_VBS,
.maxVertexInputAttributeOffset = 2047,
.maxGeometryOutputComponents = 128,
.maxGeometryOutputVertices = 256,
.maxGeometryTotalOutputComponents = 1024,
- .maxFragmentInputComponents = 112, /* 128 components - (POS, PSIZ, CLIP_DIST0, CLIP_DIST1) */
+ .maxFragmentInputComponents = 116, /* 128 components - (PSIZ, CLIP_DIST0, CLIP_DIST1) */
.maxFragmentOutputAttachments = 8,
.maxFragmentDualSrcAttachments = 1,
.maxFragmentCombinedOutputResources = 8,
- .maxComputeSharedMemorySize = 32768,
+ .maxComputeSharedMemorySize = 64 * 1024,
.maxComputeWorkGroupCount = { 65535, 65535, 65535 },
.maxComputeWorkGroupInvocations = 32 * devinfo->max_cs_threads,
.maxComputeWorkGroupSize = {
.viewportBoundsRange = { INT16_MIN, INT16_MAX },
.viewportSubPixelBits = 13, /* We take a float? */
.minMemoryMapAlignment = 4096, /* A page */
- .minTexelBufferOffsetAlignment = 1,
+ /* The dataport requires texel alignment so we need to assume a worst
+ * case of R32G32B32A32 which is 16 bytes.
+ */
+ .minTexelBufferOffsetAlignment = 16,
/* We need 16 for UBO block reads to work and 32 for push UBOs */
.minUniformBufferOffsetAlignment = 32,
.minStorageBufferOffsetAlignment = 4,
.sampledImageStencilSampleCounts = sample_counts,
.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
.maxSampleMaskWords = 1,
- .timestampComputeAndGraphics = false,
+ .timestampComputeAndGraphics = true,
.timestampPeriod = 1000000000.0 / devinfo->timestamp_frequency,
.maxClipDistances = 8,
.maxCullDistances = 8,
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT: {
+ VkPhysicalDeviceDescriptorIndexingPropertiesEXT *props =
+ (VkPhysicalDeviceDescriptorIndexingPropertiesEXT *)ext;
+
+ /* It's a bit hard to exactly map our implementation to the limits
+ * described here. The bindless surface handle in the extended
+ * message descriptors is 20 bits and it's an index into the table of
+ * RENDER_SURFACE_STATE structs that starts at bindless surface base
+ * address. Given that most things consume two surface states per
+ * view (general/sampled for textures and write-only/read-write for
+ * images), we claim 2^19 things.
+ *
+ * For SSBOs, we just use A64 messages so there is no real limit
+ * there beyond the limit on the total size of a descriptor set.
+ */
+ const unsigned max_bindless_views = 1 << 19;
+
+ props->maxUpdateAfterBindDescriptorsInAllPools = max_bindless_views;
+ props->shaderUniformBufferArrayNonUniformIndexingNative = false;
+ props->shaderSampledImageArrayNonUniformIndexingNative = false;
+ props->shaderStorageBufferArrayNonUniformIndexingNative = true;
+ props->shaderStorageImageArrayNonUniformIndexingNative = false;
+ props->shaderInputAttachmentArrayNonUniformIndexingNative = false;
+ props->robustBufferAccessUpdateAfterBind = true;
+ props->quadDivergentImplicitLod = false;
+ props->maxPerStageDescriptorUpdateAfterBindSamplers = max_bindless_views;
+ props->maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
+ props->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX;
+ props->maxPerStageDescriptorUpdateAfterBindSampledImages = max_bindless_views;
+ props->maxPerStageDescriptorUpdateAfterBindStorageImages = max_bindless_views;
+ props->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS;
+ props->maxPerStageUpdateAfterBindResources = UINT32_MAX;
+ props->maxDescriptorSetUpdateAfterBindSamplers = max_bindless_views;
+ props->maxDescriptorSetUpdateAfterBindUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
+ props->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
+ props->maxDescriptorSetUpdateAfterBindStorageBuffers = UINT32_MAX;
+ props->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
+ props->maxDescriptorSetUpdateAfterBindSampledImages = max_bindless_views;
+ props->maxDescriptorSetUpdateAfterBindStorageImages = max_bindless_views;
+ props->maxDescriptorSetUpdateAfterBindInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR: {
VkPhysicalDeviceDriverPropertiesKHR *driver_props =
(VkPhysicalDeviceDriverPropertiesKHR *) ext;
driver_props->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA_KHR;
- util_snprintf(driver_props->driverName, VK_MAX_DRIVER_NAME_SIZE_KHR,
- "Intel open-source Mesa driver");
+ snprintf(driver_props->driverName, VK_MAX_DRIVER_NAME_SIZE_KHR,
+ "Intel open-source Mesa driver");
- util_snprintf(driver_props->driverInfo, VK_MAX_DRIVER_INFO_SIZE_KHR,
- "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
+ snprintf(driver_props->driverInfo, VK_MAX_DRIVER_INFO_SIZE_KHR,
+ "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
driver_props->conformanceVersion = (VkConformanceVersionKHR) {
.major = 1,
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {
VkPhysicalDevicePointClippingProperties *properties =
(VkPhysicalDevicePointClippingProperties *) ext;
- properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
- anv_finishme("Implement pop-free point clipping");
+ properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY;
break;
}
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: {
+ VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *props =
+ (VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *)ext;
+
+ /* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface
+ * Base Address:
+ *
+ * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field
+ * specifies the base address of the first element of the surface,
+ * computed in software by adding the surface base address to the
+ * byte offset of the element in the buffer. The base address must
+ * be aligned to element size."
+ *
+ * The typed dataport messages require that things be texel aligned.
+ * Otherwise, we may just load/store the wrong data or, in the worst
+ * case, there may be hangs.
+ */
+ props->storageTexelBufferOffsetAlignmentBytes = 16;
+ props->storageTexelBufferOffsetSingleTexelAlignment = true;
+
+ /* The sampler, however, is much more forgiving and it can handle
+ * arbitrary byte alignment for linear and buffer surfaces. It's
+ * hard to find a good PRM citation for this but years of empirical
+ * experience demonstrate that this is true.
+ */
+ props->uniformTexelBufferOffsetAlignmentBytes = 1;
+ props->uniformTexelBufferOffsetSingleTexelAlignment = false;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
VkPhysicalDeviceTransformFeedbackPropertiesEXT *props =
(VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
}
}
+static void
+anv_get_memory_budget(VkPhysicalDevice physicalDevice,
+ VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
+{
+ ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
+ uint64_t sys_available = get_available_system_memory();
+ assert(sys_available > 0);
+
+ VkDeviceSize total_heaps_size = 0;
+ for (size_t i = 0; i < device->memory.heap_count; i++)
+ total_heaps_size += device->memory.heaps[i].size;
+
+ for (size_t i = 0; i < device->memory.heap_count; i++) {
+ VkDeviceSize heap_size = device->memory.heaps[i].size;
+ VkDeviceSize heap_used = device->memory.heaps[i].used;
+ VkDeviceSize heap_budget;
+
+ double heap_proportion = (double) heap_size / total_heaps_size;
+ VkDeviceSize sys_available_prop = sys_available * heap_proportion;
+
+ /*
+ * Let's not incite the app to starve the system: report at most 90% of
+ * available system memory.
+ */
+ uint64_t heap_available = sys_available_prop * 9 / 10;
+ heap_budget = MIN2(heap_size, heap_used + heap_available);
+
+ /*
+ * Round down to the nearest MB
+ */
+ heap_budget &= ~((1ull << 20) - 1);
+
+ /*
+ * The heapBudget value must be non-zero for array elements less than
+ * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The heapBudget
+ * value must be less than or equal to VkMemoryHeap::size for each heap.
+ */
+ assert(0 < heap_budget && heap_budget <= heap_size);
+
+ memoryBudget->heapUsage[i] = heap_used;
+ memoryBudget->heapBudget[i] = heap_budget;
+ }
+
+ /* The heapBudget and heapUsage values must be zero for array elements
+ * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
+ */
+ for (uint32_t i = device->memory.heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
+ memoryBudget->heapBudget[i] = 0;
+ memoryBudget->heapUsage[i] = 0;
+ }
+}
+
void anv_GetPhysicalDeviceMemoryProperties2(
VkPhysicalDevice physicalDevice,
VkPhysicalDeviceMemoryProperties2* pMemoryProperties)
vk_foreach_struct(ext, pMemoryProperties->pNext) {
switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT:
+ anv_get_memory_budget(physicalDevice, (void*)ext);
+ break;
default:
anv_debug_ignored_stype(ext->sType);
break;
return state;
}
+/* Haswell border color is a bit of a disaster. Float and unorm formats use a
+ * straightforward 32-bit float color in the first 64 bytes. Instead of using
+ * a nice float/integer union like Gen8+, Haswell specifies the integer border
+ * color as a separate entry /after/ the float color. The layout of this entry
+ * also depends on the format's bpp (with extra hacks for RG32), and overlaps.
+ *
+ * Since we don't know the format/bpp, we can't make any of the border colors
+ * containing '1' work for all formats, as it would be in the wrong place for
+ * some of them. We opt to make 32-bit integers work as this seems like the
+ * most common option. Fortunately, transparent black works regardless, as
+ * all zeroes is the same in every bit-size.
+ */
+struct hsw_border_color {
+ float float32[4];
+ uint32_t _pad0[12];
+ uint32_t uint32[4];
+ uint32_t _pad1[108];
+};
+
struct gen8_border_color {
union {
float float32[4];
static void
anv_device_init_border_colors(struct anv_device *device)
{
- static const struct gen8_border_color border_colors[] = {
- [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
- [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
- [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
- [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
- [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
- [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
- };
+ if (device->info.is_haswell) {
+ static const struct hsw_border_color border_colors[] = {
+ [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
+ [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
+ [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
+ [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
+ [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
+ [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
+ };
+
+ device->border_colors =
+ anv_state_pool_emit_data(&device->dynamic_state_pool,
+ sizeof(border_colors), 512, border_colors);
+ } else {
+ static const struct gen8_border_color border_colors[] = {
+ [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
+ [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
+ [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
+ [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
+ [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
+ [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
+ };
- device->border_colors = anv_state_pool_emit_data(&device->dynamic_state_pool,
- sizeof(border_colors), 64,
- border_colors);
+ device->border_colors =
+ anv_state_pool_emit_data(&device->dynamic_state_pool,
+ sizeof(border_colors), 64, border_colors);
+ }
}
static void
if (!device)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
- const unsigned decode_flags =
- GEN_BATCH_DECODE_FULL |
- ((INTEL_DEBUG & DEBUG_COLOR) ? GEN_BATCH_DECODE_IN_COLOR : 0) |
- GEN_BATCH_DECODE_OFFSETS |
- GEN_BATCH_DECODE_FLOATS;
+ if (INTEL_DEBUG & DEBUG_BATCH) {
+ const unsigned decode_flags =
+ GEN_BATCH_DECODE_FULL |
+ ((INTEL_DEBUG & DEBUG_COLOR) ? GEN_BATCH_DECODE_IN_COLOR : 0) |
+ GEN_BATCH_DECODE_OFFSETS |
+ GEN_BATCH_DECODE_FLOATS;
- gen_batch_decode_ctx_init(&device->decoder_ctx,
- &physical_device->info,
- stderr, decode_flags, NULL,
- decode_get_bo, NULL, device);
+ gen_batch_decode_ctx_init(&device->decoder_ctx,
+ &physical_device->info,
+ stderr, decode_flags, NULL,
+ decode_get_bo, NULL, device);
+ }
device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
device->instance = physical_device->instance;
goto fail_surface_state_pool;
}
- result = anv_bo_init_new(&device->workaround_bo, device, 1024);
+ result = anv_bo_init_new(&device->workaround_bo, device, 4096);
if (result != VK_SUCCESS)
goto fail_binding_table_pool;
anv_gem_destroy_context(device, device->context_id);
- gen_batch_decode_ctx_finish(&device->decoder_ctx);
+ if (INTEL_DEBUG & DEBUG_BATCH)
+ gen_batch_decode_ctx_finish(&device->decoder_ctx);
close(device->fd);
*pMem = anv_device_memory_to_handle(mem);
+ p_atomic_add(&pdevice->memory.heaps[mem->type->heapIndex].used,
+ mem->bo->size);
+
return VK_SUCCESS;
fail:
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
+ struct anv_physical_device *pdevice = &device->instance->physicalDevice;
if (mem == NULL)
return;
if (mem->map)
anv_UnmapMemory(_device, _mem);
+ p_atomic_add(&pdevice->memory.heaps[mem->type->heapIndex].used,
+ -mem->bo->size);
+
anv_bo_cache_release(device, &device->bo_cache, mem->bo);
#if defined(ANDROID) && ANDROID_API_LEVEL >= 26
if (!sampler)
return;
+ if (sampler->bindless_state.map) {
+ anv_state_pool_free(&device->dynamic_state_pool,
+ sampler->bindless_state);
+ }
+
vk_free2(&device->alloc, pAllocator, sampler);
}