X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fvulkan%2Fanv_device.c;h=f6b41b79de651a4bc4c738bcffa76f9e903e909c;hb=40a6de176d0f4ffa9fcad8f2c8ab30a7e8cfe807;hp=5599f35304e4d2d408a2aa1db0d3ed36a785f0ba;hpb=e618951322e4bf27991c1a31c5933bd0d0f580a1;p=mesa.git diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 5599f35304e..f6b41b79de6 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -39,7 +39,7 @@ #include "util/os_file.h" #include "util/u_atomic.h" #include "util/u_string.h" -#include "util/xmlpool.h" +#include "util/driconf.h" #include "git_sha1.h" #include "vk_util.h" #include "common/gen_aux_map.h" @@ -66,6 +66,9 @@ DRI_CONF_END; */ #define MAX_DEBUG_MESSAGE_LENGTH 4096 +/* Render engine timestamp register */ +#define TIMESTAMP 0x2358 + static void compiler_debug_log(void *data, const char *fmt, ...) { @@ -136,7 +139,7 @@ anv_physical_device_init_heaps(struct anv_physical_device *device, int fd) anv_perf_warn(NULL, NULL, "Failed to get I915_CONTEXT_PARAM_GTT_SIZE: %m"); - if (anv_gem_get_aperture(fd, &device->gtt_size) == -1) { + if (gen_get_aperture_size(fd, &device->gtt_size) == -1) { return vk_errorfi(device->instance, NULL, VK_ERROR_INITIALIZATION_FAILED, "failed to get aperture size: %m"); @@ -308,7 +311,7 @@ anv_physical_device_free_disk_cache(struct anv_physical_device *device) static uint64_t get_available_system_memory() { - char *meminfo = os_read_file("/proc/meminfo"); + char *meminfo = os_read_file("/proc/meminfo", NULL); if (!meminfo) return 0; @@ -377,7 +380,7 @@ anv_physical_device_try_create(struct anv_instance *instance, goto fail_fd; } - device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + vk_object_base_init(NULL, &device->base, VK_OBJECT_TYPE_PHYSICAL_DEVICE); device->instance = instance; assert(strlen(path) < ARRAY_SIZE(device->path)); @@ -451,6 +454,10 @@ anv_physical_device_try_create(struct anv_instance *instance, device->always_use_bindless = env_var_as_boolean("ANV_ALWAYS_BINDLESS", false); + device->use_call_secondary = + device->use_softpin && + !env_var_as_boolean("ANV_DISABLE_SECONDARY_CMD_BUFFER_CALLS", false); + /* We first got the A64 messages on broadwell and we can only use them if * we can pass addresses directly into the shader which requires softpin. */ @@ -470,23 +477,20 @@ anv_physical_device_try_create(struct anv_instance *instance, */ device->has_bindless_samplers = device->info.gen >= 8; + device->has_implicit_ccs = device->info.has_aux_map; + + /* Check if we can read the GPU timestamp register from the CPU */ + uint64_t u64_ignore; + device->has_reg_timestamp = anv_gem_reg_read(fd, TIMESTAMP | I915_REG_READ_8B_WA, + &u64_ignore) == 0; + device->has_mem_available = get_available_system_memory() != 0; device->always_flush_cache = driQueryOptionb(&instance->dri_options, "always_flush_cache"); - /* Starting with Gen10, the timestamp frequency of the command streamer may - * vary from one part to another. We can query the value from the kernel. - */ - if (device->info.gen >= 10) { - int timestamp_frequency = - anv_gem_get_param(fd, I915_PARAM_CS_TIMESTAMP_FREQUENCY); - - if (timestamp_frequency < 0) - intel_logw("Kernel 4.16-rc1+ required to properly query CS timestamp frequency"); - else - device->info.timestamp_frequency = timestamp_frequency; - } + device->has_mmap_offset = + anv_gem_get_param(fd, I915_PARAM_MMAP_GTT_VERSION) >= 4; /* GENs prior to 8 do not support EU/Subslice info */ if (device->info.gen >= 8) { @@ -603,6 +607,7 @@ anv_physical_device_destroy(struct anv_physical_device *device) close(device->local_fd); if (device->master_fd >= 0) close(device->master_fd); + vk_object_base_finish(&device->base); vk_free(&device->instance->alloc, device); } @@ -684,7 +689,7 @@ VkResult anv_CreateInstance( if (!instance) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + vk_object_base_init(NULL, &instance->base, VK_OBJECT_TYPE_INSTANCE); if (pAllocator) instance->alloc = *pAllocator; @@ -804,6 +809,7 @@ void anv_DestroyInstance( driDestroyOptionCache(&instance->dri_options); driDestroyOptionInfo(&instance->available_dri_options); + vk_object_base_finish(&instance->base); vk_free(&instance->alloc, instance); } @@ -949,9 +955,9 @@ void anv_GetPhysicalDeviceFeatures( .shaderClipDistance = true, .shaderCullDistance = true, .shaderFloat64 = pdevice->info.gen >= 8 && - pdevice->info.has_64bit_types, + pdevice->info.has_64bit_float, .shaderInt64 = pdevice->info.gen >= 8 && - pdevice->info.has_64bit_types, + pdevice->info.has_64bit_int, .shaderInt16 = pdevice->info.gen >= 8, .shaderResourceMinLod = pdevice->info.gen >= 9, .variableMultisampleRate = true, @@ -1129,6 +1135,14 @@ void anv_GetPhysicalDeviceFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: { + VkPhysicalDeviceCustomBorderColorFeaturesEXT *features = + (VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext; + features->customBorderColors = pdevice->info.gen >= 8; + features->customBorderColorWithoutFormat = pdevice->info.gen >= 8; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: { VkPhysicalDeviceDepthClipEnableFeaturesEXT *features = (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext; @@ -1236,6 +1250,22 @@ void anv_GetPhysicalDeviceFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: { + VkPhysicalDevicePerformanceQueryFeaturesKHR *feature = + (VkPhysicalDevicePerformanceQueryFeaturesKHR *)ext; + feature->performanceCounterQueryPools = true; + /* HW only supports a single configuration at a time. */ + feature->performanceCounterMultipleQueryPools = false; + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT: { + VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *features = + (VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *)ext; + features->pipelineCreationCacheControl = true; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: { VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features = (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext; @@ -1243,12 +1273,26 @@ void anv_GetPhysicalDeviceFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES_EXT: { + VkPhysicalDevicePrivateDataFeaturesEXT *features = (void *)ext; + features->privateData = true; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: { VkPhysicalDeviceProtectedMemoryFeatures *features = (void *)ext; CORE_FEATURE(1, 1, protectedMemory); break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: { + VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext; + features->robustBufferAccess2 = true; + features->robustImageAccess2 = true; + features->nullDescriptor = true; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: { VkPhysicalDeviceSamplerYcbcrConversionFeatures *features = (VkPhysicalDeviceSamplerYcbcrConversionFeatures *) ext; @@ -1393,6 +1437,8 @@ void anv_GetPhysicalDeviceFeatures2( #define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64 #define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS 256 +#define MAX_CUSTOM_BORDER_COLORS 4096 + void anv_GetPhysicalDeviceProperties( VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties) @@ -1420,7 +1466,8 @@ void anv_GetPhysicalDeviceProperties( pdevice->has_bindless_images && pdevice->has_a64_buffer_access ? UINT32_MAX : MAX_BINDING_TABLE_SIZE - MAX_RTS - 1; - const uint32_t max_workgroup_size = 32 * devinfo->max_cs_threads; + /* Limit max_threads to 64 for the GPGPU_WALKER command */ + const uint32_t max_workgroup_size = 32 * MIN2(64, devinfo->max_cs_threads); VkSampleCountFlags sample_counts = isl_device_get_sample_counts(&pdevice->isl_dev); @@ -1502,8 +1549,7 @@ void anv_GetPhysicalDeviceProperties( * case of R32G32B32A32 which is 16 bytes. */ .minTexelBufferOffsetAlignment = 16, - /* We need 16 for UBO block reads to work and 32 for push UBOs */ - .minUniformBufferOffsetAlignment = 32, + .minUniformBufferOffsetAlignment = ANV_UBO_ALIGNMENT, .minStorageBufferOffsetAlignment = 4, .minTexelOffset = -8, .maxTexelOffset = 7, @@ -1748,6 +1794,13 @@ void anv_GetPhysicalDeviceProperties2( vk_foreach_struct(ext, pProperties->pNext) { switch (ext->sType) { + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: { + VkPhysicalDeviceCustomBorderColorPropertiesEXT *properties = + (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext; + properties->maxCustomBorderColorSamplers = MAX_CUSTOM_BORDER_COLORS; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES_KHR: { VkPhysicalDeviceDepthStencilResolvePropertiesKHR *properties = (VkPhysicalDeviceDepthStencilResolvePropertiesKHR *)ext; @@ -1878,6 +1931,16 @@ void anv_GetPhysicalDeviceProperties2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_PROPERTIES_KHR: { + VkPhysicalDevicePerformanceQueryPropertiesKHR *properties = + (VkPhysicalDevicePerformanceQueryPropertiesKHR *)ext; + /* We could support this by spawning a shader to do the equation + * normalization. + */ + properties->allowCommandBufferQueryCopies = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: { VkPhysicalDevicePointClippingProperties *properties = (VkPhysicalDevicePointClippingProperties *) ext; @@ -1909,6 +1972,15 @@ void anv_GetPhysicalDeviceProperties2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: { + VkPhysicalDeviceRobustness2PropertiesEXT *properties = (void *)ext; + properties->robustStorageBufferAccessSizeAlignment = + ANV_SSBO_BOUNDS_CHECK_ALIGNMENT; + properties->robustUniformBufferAccessSizeAlignment = + ANV_UBO_ALIGNMENT; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT: { VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *properties = (VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *)ext; @@ -2212,6 +2284,11 @@ PFN_vkVoidFunction anv_GetInstanceProcAddr( LOOKUP_ANV_ENTRYPOINT(EnumerateInstanceVersion); LOOKUP_ANV_ENTRYPOINT(CreateInstance); + /* GetInstanceProcAddr() can also be called with a NULL instance. + * See https://gitlab.khronos.org/vulkan/vulkan/issues/2057 + */ + LOOKUP_ANV_ENTRYPOINT(GetInstanceProcAddr); + #undef LOOKUP_ANV_ENTRYPOINT if (instance == NULL) @@ -2337,34 +2414,6 @@ anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, return state; } -/* Haswell border color is a bit of a disaster. Float and unorm formats use a - * straightforward 32-bit float color in the first 64 bytes. Instead of using - * a nice float/integer union like Gen8+, Haswell specifies the integer border - * color as a separate entry /after/ the float color. The layout of this entry - * also depends on the format's bpp (with extra hacks for RG32), and overlaps. - * - * Since we don't know the format/bpp, we can't make any of the border colors - * containing '1' work for all formats, as it would be in the wrong place for - * some of them. We opt to make 32-bit integers work as this seems like the - * most common option. Fortunately, transparent black works regardless, as - * all zeroes is the same in every bit-size. - */ -struct hsw_border_color { - float float32[4]; - uint32_t _pad0[12]; - uint32_t uint32[4]; - uint32_t _pad1[108]; -}; - -struct gen8_border_color { - union { - float float32[4]; - uint32_t uint32[4]; - }; - /* Pad out to 64 bytes */ - uint32_t _pad[12]; -}; - static void anv_device_init_border_colors(struct anv_device *device) { @@ -2442,55 +2491,6 @@ VkResult anv_EnumerateDeviceExtensionProperties( return vk_outarray_status(&out); } -static void -anv_device_init_dispatch(struct anv_device *device) -{ - const struct anv_instance *instance = device->physical->instance; - - const struct anv_device_dispatch_table *genX_table; - switch (device->info.gen) { - case 12: - genX_table = &gen12_device_dispatch_table; - break; - case 11: - genX_table = &gen11_device_dispatch_table; - break; - case 10: - genX_table = &gen10_device_dispatch_table; - break; - case 9: - genX_table = &gen9_device_dispatch_table; - break; - case 8: - genX_table = &gen8_device_dispatch_table; - break; - case 7: - if (device->info.is_haswell) - genX_table = &gen75_device_dispatch_table; - else - genX_table = &gen7_device_dispatch_table; - break; - default: - unreachable("unsupported gen\n"); - } - - for (unsigned i = 0; i < ARRAY_SIZE(device->dispatch.entrypoints); i++) { - /* Vulkan requires that entrypoints for extensions which have not been - * enabled must not be advertised. - */ - if (!anv_device_entrypoint_is_enabled(i, instance->app_info.api_version, - &instance->enabled_extensions, - &device->enabled_extensions)) { - device->dispatch.entrypoints[i] = NULL; - } else if (genX_table->entrypoints[i]) { - device->dispatch.entrypoints[i] = genX_table->entrypoints[i]; - } else { - device->dispatch.entrypoints[i] = - anv_device_dispatch_table.entrypoints[i]; - } - } -} - static int vk_priority_to_gen(int priority) { @@ -2628,6 +2628,23 @@ static struct gen_mapped_pinned_buffer_alloc aux_map_allocator = { .free = gen_aux_map_buffer_free, }; +static VkResult +check_physical_device_features(VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceFeatures *features) +{ + VkPhysicalDeviceFeatures supported_features; + anv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features); + VkBool32 *supported_feature = (VkBool32 *)&supported_features; + VkBool32 *enabled_feature = (VkBool32 *)features; + unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32); + for (uint32_t i = 0; i < num_features; i++) { + if (enabled_feature[i] && !supported_feature[i]) + return vk_error(VK_ERROR_FEATURE_NOT_PRESENT); + } + + return VK_SUCCESS; +} + VkResult anv_CreateDevice( VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, @@ -2659,15 +2676,34 @@ VkResult anv_CreateDevice( } /* Check enabled features */ + bool robust_buffer_access = false; if (pCreateInfo->pEnabledFeatures) { - VkPhysicalDeviceFeatures supported_features; - anv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features); - VkBool32 *supported_feature = (VkBool32 *)&supported_features; - VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures; - unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32); - for (uint32_t i = 0; i < num_features; i++) { - if (enabled_feature[i] && !supported_feature[i]) - return vk_error(VK_ERROR_FEATURE_NOT_PRESENT); + result = check_physical_device_features(physicalDevice, + pCreateInfo->pEnabledFeatures); + if (result != VK_SUCCESS) + return result; + + if (pCreateInfo->pEnabledFeatures->robustBufferAccess) + robust_buffer_access = true; + } + + vk_foreach_struct_const(ext, pCreateInfo->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: { + const VkPhysicalDeviceFeatures2 *features = (const void *)ext; + result = check_physical_device_features(physicalDevice, + &features->features); + if (result != VK_SUCCESS) + return result; + + if (features->features.robustBufferAccess) + robust_buffer_access = true; + break; + } + + default: + /* Don't warn */ + break; } } @@ -2695,6 +2731,9 @@ VkResult anv_CreateDevice( if (!device) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + vk_device_init(&device->vk, pCreateInfo, + &physical_device->instance->alloc, pAllocator); + if (INTEL_DEBUG & DEBUG_BATCH) { const unsigned decode_flags = GEN_BATCH_DECODE_FULL | @@ -2708,16 +2747,10 @@ VkResult anv_CreateDevice( decode_get_bo, NULL, device); } - device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; device->physical = physical_device; device->no_hw = physical_device->no_hw; device->_lost = false; - if (pAllocator) - device->alloc = *pAllocator; - else - device->alloc = physical_device->instance->alloc; - /* XXX(chadv): Can we dup() physicalDevice->fd here? */ device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC); if (device->fd == -1) { @@ -2784,11 +2817,23 @@ VkResult anv_CreateDevice( */ device->can_chain_batches = device->info.gen >= 8; - device->robust_buffer_access = pCreateInfo->pEnabledFeatures && - pCreateInfo->pEnabledFeatures->robustBufferAccess; + device->robust_buffer_access = robust_buffer_access; device->enabled_extensions = enabled_extensions; - anv_device_init_dispatch(device); + const struct anv_instance *instance = physical_device->instance; + for (unsigned i = 0; i < ARRAY_SIZE(device->dispatch.entrypoints); i++) { + /* Vulkan requires that entrypoints for extensions which have not been + * enabled must not be advertised. + */ + if (!anv_device_entrypoint_is_enabled(i, instance->app_info.api_version, + &instance->enabled_extensions, + &device->enabled_extensions)) { + device->dispatch.entrypoints[i] = NULL; + } else { + device->dispatch.entrypoints[i] = + anv_resolve_device_entrypoint(&device->info, i); + } + } if (pthread_mutex_init(&device->mutex, NULL) != 0) { result = vk_error(VK_ERROR_INITIALIZATION_FAILED); @@ -2819,44 +2864,87 @@ VkResult anv_CreateDevice( anv_bo_pool_init(&device->batch_bo_pool, device); result = anv_state_pool_init(&device->dynamic_state_pool, device, - DYNAMIC_STATE_POOL_MIN_ADDRESS, 16384); + DYNAMIC_STATE_POOL_MIN_ADDRESS, 0, 16384); if (result != VK_SUCCESS) goto fail_batch_bo_pool; + if (device->info.gen >= 8) { + /* The border color pointer is limited to 24 bits, so we need to make + * sure that any such color used at any point in the program doesn't + * exceed that limit. + * We achieve that by reserving all the custom border colors we support + * right off the bat, so they are close to the base address. + */ + anv_state_reserved_pool_init(&device->custom_border_colors, + &device->dynamic_state_pool, + sizeof(struct gen8_border_color), + MAX_CUSTOM_BORDER_COLORS, 64); + } + result = anv_state_pool_init(&device->instruction_state_pool, device, - INSTRUCTION_STATE_POOL_MIN_ADDRESS, 16384); + INSTRUCTION_STATE_POOL_MIN_ADDRESS, 0, 16384); if (result != VK_SUCCESS) goto fail_dynamic_state_pool; result = anv_state_pool_init(&device->surface_state_pool, device, - SURFACE_STATE_POOL_MIN_ADDRESS, 4096); + SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096); if (result != VK_SUCCESS) goto fail_instruction_state_pool; if (physical_device->use_softpin) { + int64_t bt_pool_offset = (int64_t)BINDING_TABLE_POOL_MIN_ADDRESS - + (int64_t)SURFACE_STATE_POOL_MIN_ADDRESS; + assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0); result = anv_state_pool_init(&device->binding_table_pool, device, - BINDING_TABLE_POOL_MIN_ADDRESS, 4096); + SURFACE_STATE_POOL_MIN_ADDRESS, + bt_pool_offset, 4096); if (result != VK_SUCCESS) goto fail_surface_state_pool; } - if (device->info.gen >= 12) { + if (device->info.has_aux_map) { device->aux_map_ctx = gen_aux_map_init(device, &aux_map_allocator, &physical_device->info); if (!device->aux_map_ctx) goto fail_binding_table_pool; } - result = anv_device_alloc_bo(device, 4096, 0 /* flags */, + result = anv_device_alloc_bo(device, 4096, + ANV_BO_ALLOC_CAPTURE | ANV_BO_ALLOC_MAPPED /* flags */, 0 /* explicit_address */, &device->workaround_bo); if (result != VK_SUCCESS) goto fail_surface_aux_map_pool; + device->workaround_address = (struct anv_address) { + .bo = device->workaround_bo, + .offset = align_u32( + intel_debug_write_identifiers(device->workaround_bo->map, + device->workaround_bo->size, + "Anv") + 8, 8), + }; + + if (!device->info.has_llc) { + gen_clflush_range(device->workaround_bo->map, + device->workaround_address.offset); + } + result = anv_device_init_trivial_batch(device); if (result != VK_SUCCESS) goto fail_workaround_bo; + /* Allocate a null surface state at surface state offset 0. This makes + * NULL descriptor handling trivial because we can just memset structures + * to zero and they have a valid descriptor. + */ + device->null_surface_state = + anv_state_pool_alloc(&device->surface_state_pool, + device->isl_dev.ss.size, + device->isl_dev.ss.align); + isl_null_fill_state(&device->isl_dev, device->null_surface_state.map, + isl_extent3d(1, 1, 1) /* This shouldn't matter */); + assert(device->null_surface_state.offset == 0); + if (device->info.gen >= 10) { result = anv_device_init_hiz_clear_value_bo(device); if (result != VK_SUCCESS) @@ -2893,9 +2981,10 @@ VkResult anv_CreateDevice( unreachable("unhandled gen"); } if (result != VK_SUCCESS) - goto fail_workaround_bo; + goto fail_clear_value_bo; - anv_pipeline_cache_init(&device->default_pipeline_cache, device, true); + anv_pipeline_cache_init(&device->default_pipeline_cache, device, + true /* cache_enabled */, false /* external_sync */); anv_device_init_blorp(device); @@ -2907,15 +2996,16 @@ VkResult anv_CreateDevice( return VK_SUCCESS; - fail_workaround_bo: - anv_scratch_pool_finish(device, &device->scratch_pool); + fail_clear_value_bo: if (device->info.gen >= 10) anv_device_release_bo(device, device->hiz_clear_bo); - anv_device_release_bo(device, device->workaround_bo); + anv_scratch_pool_finish(device, &device->scratch_pool); fail_trivial_batch_bo: anv_device_release_bo(device, device->trivial_batch_bo); + fail_workaround_bo: + anv_device_release_bo(device, device->workaround_bo); fail_surface_aux_map_pool: - if (device->info.gen >= 12) { + if (device->info.has_aux_map) { gen_aux_map_finish(device->aux_map_ctx); device->aux_map_ctx = NULL; } @@ -2927,6 +3017,8 @@ VkResult anv_CreateDevice( fail_instruction_state_pool: anv_state_pool_finish(&device->instruction_state_pool); fail_dynamic_state_pool: + if (device->info.gen >= 8) + anv_state_reserved_pool_finish(&device->custom_border_colors); anv_state_pool_finish(&device->dynamic_state_pool); fail_batch_bo_pool: anv_bo_pool_finish(&device->batch_bo_pool); @@ -2948,7 +3040,7 @@ VkResult anv_CreateDevice( fail_fd: close(device->fd); fail_device: - vk_free(&device->alloc, device); + vk_free(&device->vk.alloc, device); return result; } @@ -2972,6 +3064,8 @@ void anv_DestroyDevice( /* We only need to free these to prevent valgrind errors. The backing * BO will go away in a couple of lines so we don't actually leak. */ + if (device->info.gen >= 8) + anv_state_reserved_pool_finish(&device->custom_border_colors); anv_state_pool_free(&device->dynamic_state_pool, device->border_colors); anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash); #endif @@ -2983,7 +3077,7 @@ void anv_DestroyDevice( if (device->info.gen >= 10) anv_device_release_bo(device, device->hiz_clear_bo); - if (device->info.gen >= 12) { + if (device->info.has_aux_map) { gen_aux_map_finish(device->aux_map_ctx); device->aux_map_ctx = NULL; } @@ -3014,7 +3108,8 @@ void anv_DestroyDevice( close(device->fd); - vk_free(&device->alloc, device); + vk_device_finish(&device->vk); + vk_free(&device->vk.alloc, device); } VkResult anv_EnumerateInstanceLayerProperties( @@ -3201,40 +3296,24 @@ VkResult anv_DeviceWaitIdle( return anv_queue_submit_simple_batch(&device->queue, NULL); } -bool -anv_vma_alloc(struct anv_device *device, struct anv_bo *bo, +uint64_t +anv_vma_alloc(struct anv_device *device, + uint64_t size, uint64_t align, + enum anv_bo_alloc_flags alloc_flags, uint64_t client_address) { - const struct gen_device_info *devinfo = &device->info; - /* Gen12 CCS surface addresses need to be 64K aligned. We have no way of - * telling what this allocation is for so pick the largest alignment. - */ - const uint32_t vma_alignment = - devinfo->gen >= 12 ? (64 * 1024) : (4 * 1024); - - if (!(bo->flags & EXEC_OBJECT_PINNED)) { - assert(!(bo->has_client_visible_address)); - return true; - } - pthread_mutex_lock(&device->vma_mutex); - bo->offset = 0; + uint64_t addr = 0; - if (bo->has_client_visible_address) { - assert(bo->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS); + if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) { if (client_address) { if (util_vma_heap_alloc_addr(&device->vma_cva, - client_address, bo->size)) { - bo->offset = gen_canonical_address(client_address); + client_address, size)) { + addr = client_address; } } else { - uint64_t addr = - util_vma_heap_alloc(&device->vma_cva, bo->size, vma_alignment); - if (addr) { - bo->offset = gen_canonical_address(addr); - assert(addr == gen_48b_address(bo->offset)); - } + addr = util_vma_heap_alloc(&device->vma_cva, size, align); } /* We don't want to fall back to other heaps */ goto done; @@ -3242,54 +3321,39 @@ anv_vma_alloc(struct anv_device *device, struct anv_bo *bo, assert(client_address == 0); - if (bo->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) { - uint64_t addr = - util_vma_heap_alloc(&device->vma_hi, bo->size, vma_alignment); - if (addr) { - bo->offset = gen_canonical_address(addr); - assert(addr == gen_48b_address(bo->offset)); - } - } + if (!(alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS)) + addr = util_vma_heap_alloc(&device->vma_hi, size, align); - if (bo->offset == 0) { - uint64_t addr = - util_vma_heap_alloc(&device->vma_lo, bo->size, vma_alignment); - if (addr) { - bo->offset = gen_canonical_address(addr); - assert(addr == gen_48b_address(bo->offset)); - } - } + if (addr == 0) + addr = util_vma_heap_alloc(&device->vma_lo, size, align); done: pthread_mutex_unlock(&device->vma_mutex); - return bo->offset != 0; + assert(addr == gen_48b_address(addr)); + return gen_canonical_address(addr); } void -anv_vma_free(struct anv_device *device, struct anv_bo *bo) +anv_vma_free(struct anv_device *device, + uint64_t address, uint64_t size) { - if (!(bo->flags & EXEC_OBJECT_PINNED)) - return; - - const uint64_t addr_48b = gen_48b_address(bo->offset); + const uint64_t addr_48b = gen_48b_address(address); pthread_mutex_lock(&device->vma_mutex); if (addr_48b >= LOW_HEAP_MIN_ADDRESS && addr_48b <= LOW_HEAP_MAX_ADDRESS) { - util_vma_heap_free(&device->vma_lo, addr_48b, bo->size); + util_vma_heap_free(&device->vma_lo, addr_48b, size); } else if (addr_48b >= CLIENT_VISIBLE_HEAP_MIN_ADDRESS && addr_48b <= CLIENT_VISIBLE_HEAP_MAX_ADDRESS) { - util_vma_heap_free(&device->vma_cva, addr_48b, bo->size); + util_vma_heap_free(&device->vma_cva, addr_48b, size); } else { assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS); - util_vma_heap_free(&device->vma_hi, addr_48b, bo->size); + util_vma_heap_free(&device->vma_hi, addr_48b, size); } pthread_mutex_unlock(&device->vma_mutex); - - bo->offset = 0; } VkResult anv_AllocateMemory( @@ -3325,12 +3389,13 @@ VkResult anv_AllocateMemory( if (mem_heap_used + aligned_alloc_size > mem_heap->size) return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); - mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, + mem = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (mem == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count); + vk_object_base_init(&device->vk, &mem->base, VK_OBJECT_TYPE_DEVICE_MEMORY); mem->type = mem_type; mem->map = NULL; mem->map_size = 0; @@ -3388,9 +3453,27 @@ VkResult anv_AllocateMemory( } } + /* By default, we want all VkDeviceMemory objects to support CCS */ + if (device->physical->has_implicit_ccs) + alloc_flags |= ANV_BO_ALLOC_IMPLICIT_CCS; + if (vk_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR) alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS; + if ((export_info && export_info->handleTypes) || + (fd_info && fd_info->handleType) || + (host_ptr_info && host_ptr_info->handleType)) { + /* Anything imported or exported is EXTERNAL */ + alloc_flags |= ANV_BO_ALLOC_EXTERNAL; + + /* We can't have implicit CCS on external memory with an AUX-table. + * Doing so would require us to sync the aux tables across processes + * which is impractical. + */ + if (device->info.has_aux_map) + alloc_flags &= ~ANV_BO_ALLOC_IMPLICIT_CCS; + } + /* Check if we need to support Android HW buffer export. If so, * create AHardwareBuffer and import memory from it. */ @@ -3435,9 +3518,6 @@ VkResult anv_AllocateMemory( if (result != VK_SUCCESS) goto fail; - VkDeviceSize aligned_alloc_size = - align_u64(pAllocateInfo->allocationSize, 4096); - /* For security purposes, we reject importing the bo if it's smaller * than the requested allocation size. This prevents a malicious client * from passing a buffer to a trusted client, lying about the size, and @@ -3494,9 +3574,6 @@ VkResult anv_AllocateMemory( /* Regular allocate (not importing memory). */ - if (export_info && export_info->handleTypes) - alloc_flags |= ANV_BO_ALLOC_EXTERNAL; - result = anv_device_alloc_bo(device, pAllocateInfo->allocationSize, alloc_flags, client_address, &mem->bo); if (result != VK_SUCCESS) @@ -3542,7 +3619,7 @@ VkResult anv_AllocateMemory( return VK_SUCCESS; fail: - vk_free2(&device->alloc, pAllocator, mem); + vk_free2(&device->vk.alloc, pAllocator, mem); return result; } @@ -3642,7 +3719,8 @@ void anv_FreeMemory( AHardwareBuffer_release(mem->ahw); #endif - vk_free2(&device->alloc, pAllocator, mem); + vk_object_base_finish(&mem->base); + vk_free2(&device->vk.alloc, pAllocator, mem); } VkResult anv_MapMemory( @@ -3692,7 +3770,11 @@ VkResult anv_MapMemory( gem_flags |= I915_MMAP_WC; /* GEM will fail to map if the offset isn't 4k-aligned. Round down. */ - uint64_t map_offset = offset & ~4095ull; + uint64_t map_offset; + if (!device->physical->has_mmap_offset) + map_offset = offset & ~4095ull; + else + map_offset = 0; assert(offset >= map_offset); uint64_t map_size = (offset + size) - map_offset; @@ -3716,12 +3798,13 @@ void anv_UnmapMemory( VkDevice _device, VkDeviceMemory _memory) { + ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_device_memory, mem, _memory); if (mem == NULL || mem->host_ptr) return; - anv_gem_munmap(mem->map, mem->map_size); + anv_gem_munmap(device, mem->map, mem->map_size); mem->map = NULL; mem->map_size = 0; @@ -3798,9 +3881,8 @@ void anv_GetBufferMemoryRequirements( /* Base alignment requirement of a cache line */ uint32_t alignment = 16; - /* We need an alignment of 32 for pushing UBOs */ if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) - alignment = MAX2(alignment, 32); + alignment = MAX2(alignment, ANV_UBO_ALIGNMENT); pMemoryRequirements->size = buffer->size; pMemoryRequirements->alignment = alignment; @@ -3861,12 +3943,6 @@ void anv_GetImageMemoryRequirements( */ uint32_t memory_types = (1ull << device->physical->memory.type_count) - 1; - /* We must have image allocated or imported at this point. According to the - * specification, external images must have been bound to memory before - * calling GetImageMemoryRequirements. - */ - assert(image->size > 0); - pMemoryRequirements->size = image->size; pMemoryRequirements->alignment = image->alignment; pMemoryRequirements->memoryTypeBits = memory_types; @@ -3906,12 +3982,6 @@ void anv_GetImageMemoryRequirements2( pMemoryRequirements->memoryRequirements.memoryTypeBits = (1ull << device->physical->memory.type_count) - 1; - /* We must have image allocated or imported at this point. According to the - * specification, external images must have been bound to memory before - * calling GetImageMemoryRequirements. - */ - assert(image->planes[plane].size > 0); - pMemoryRequirements->memoryRequirements.size = image->planes[plane].size; pMemoryRequirements->memoryRequirements.alignment = image->planes[plane].alignment; @@ -4044,22 +4114,19 @@ VkResult anv_CreateEvent( VkEvent* pEvent) { ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_state state; struct anv_event *event; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO); - state = anv_state_pool_alloc(&device->dynamic_state_pool, - sizeof(*event), 8); - event = state.map; - event->state = state; - event->semaphore = VK_EVENT_RESET; + event = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*event), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (event == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - if (!device->info.has_llc) { - /* Make sure the writes we're flushing have landed. */ - __builtin_ia32_mfence(); - __builtin_ia32_clflush(event); - } + vk_object_base_init(&device->vk, &event->base, VK_OBJECT_TYPE_EVENT); + event->state = anv_state_pool_alloc(&device->dynamic_state_pool, + sizeof(uint64_t), 8); + *(uint64_t *)event->state.map = VK_EVENT_RESET; *pEvent = anv_event_to_handle(event); @@ -4078,6 +4145,9 @@ void anv_DestroyEvent( return; anv_state_pool_free(&device->dynamic_state_pool, event->state); + + vk_object_base_finish(&event->base); + vk_free2(&device->vk.alloc, pAllocator, event); } VkResult anv_GetEventStatus( @@ -4090,30 +4160,16 @@ VkResult anv_GetEventStatus( if (anv_device_is_lost(device)) return VK_ERROR_DEVICE_LOST; - if (!device->info.has_llc) { - /* Invalidate read cache before reading event written by GPU. */ - __builtin_ia32_clflush(event); - __builtin_ia32_mfence(); - - } - - return event->semaphore; + return *(uint64_t *)event->state.map; } VkResult anv_SetEvent( VkDevice _device, VkEvent _event) { - ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_event, event, _event); - event->semaphore = VK_EVENT_SET; - - if (!device->info.has_llc) { - /* Make sure the writes we're flushing have landed. */ - __builtin_ia32_mfence(); - __builtin_ia32_clflush(event); - } + *(uint64_t *)event->state.map = VK_EVENT_SET; return VK_SUCCESS; } @@ -4122,16 +4178,9 @@ VkResult anv_ResetEvent( VkDevice _device, VkEvent _event) { - ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_event, event, _event); - event->semaphore = VK_EVENT_RESET; - - if (!device->info.has_llc) { - /* Make sure the writes we're flushing have landed. */ - __builtin_ia32_mfence(); - __builtin_ia32_clflush(event); - } + *(uint64_t *)event->state.map = VK_EVENT_RESET; return VK_SUCCESS; } @@ -4157,11 +4206,12 @@ VkResult anv_CreateBuffer( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); - buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8, + buffer = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*buffer), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (buffer == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + vk_object_base_init(&device->vk, &buffer->base, VK_OBJECT_TYPE_BUFFER); buffer->size = pCreateInfo->size; buffer->usage = pCreateInfo->usage; buffer->address = ANV_NULL_ADDRESS; @@ -4182,7 +4232,8 @@ void anv_DestroyBuffer( if (!buffer) return; - vk_free2(&device->alloc, pAllocator, buffer); + vk_object_base_finish(&buffer->base); + vk_free2(&device->vk.alloc, pAllocator, buffer); } VkDeviceAddress anv_GetBufferDeviceAddress( @@ -4247,7 +4298,13 @@ void anv_DestroySampler( sampler->bindless_state); } - vk_free2(&device->alloc, pAllocator, sampler); + if (sampler->custom_border_color.map) { + anv_state_reserved_pool_free(&device->custom_border_colors, + sampler->custom_border_color); + } + + vk_object_base_finish(&sampler->base); + vk_free2(&device->vk.alloc, pAllocator, sampler); } VkResult anv_CreateFramebuffer( @@ -4270,7 +4327,7 @@ VkResult anv_CreateFramebuffer( */ if (!(pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT_KHR)) { size += sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount; - framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8, + framebuffer = vk_alloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (framebuffer == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -4281,8 +4338,7 @@ VkResult anv_CreateFramebuffer( } framebuffer->attachment_count = pCreateInfo->attachmentCount; } else { - assert(device->enabled_extensions.KHR_imageless_framebuffer); - framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8, + framebuffer = vk_alloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (framebuffer == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -4290,6 +4346,9 @@ VkResult anv_CreateFramebuffer( framebuffer->attachment_count = 0; } + vk_object_base_init(&device->vk, &framebuffer->base, + VK_OBJECT_TYPE_FRAMEBUFFER); + framebuffer->width = pCreateInfo->width; framebuffer->height = pCreateInfo->height; framebuffer->layers = pCreateInfo->layers; @@ -4310,7 +4369,8 @@ void anv_DestroyFramebuffer( if (!fb) return; - vk_free2(&device->alloc, pAllocator, fb); + vk_object_base_finish(&fb->base); + vk_free2(&device->vk.alloc, pAllocator, fb); } static const VkTimeDomainEXT anv_time_domains[] = { @@ -4351,8 +4411,6 @@ anv_clock_gettime(clockid_t clock_id) return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec; } -#define TIMESTAMP 0x2358 - VkResult anv_GetCalibratedTimestampsEXT( VkDevice _device, uint32_t timestampCount, @@ -4372,7 +4430,7 @@ VkResult anv_GetCalibratedTimestampsEXT( for (d = 0; d < timestampCount; d++) { switch (pTimestampInfos[d].timeDomain) { case VK_TIME_DOMAIN_DEVICE_EXT: - ret = anv_gem_reg_read(device, TIMESTAMP | 1, + ret = anv_gem_reg_read(device->fd, TIMESTAMP | I915_REG_READ_8B_WA, &pTimestamps[d]); if (ret != 0) { @@ -4486,3 +4544,49 @@ vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion) *pSupportedVersion = MIN2(*pSupportedVersion, 4u); return VK_SUCCESS; } + +VkResult anv_CreatePrivateDataSlotEXT( + VkDevice _device, + const VkPrivateDataSlotCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPrivateDataSlotEXT* pPrivateDataSlot) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + return vk_private_data_slot_create(&device->vk, pCreateInfo, pAllocator, + pPrivateDataSlot); +} + +void anv_DestroyPrivateDataSlotEXT( + VkDevice _device, + VkPrivateDataSlotEXT privateDataSlot, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + vk_private_data_slot_destroy(&device->vk, privateDataSlot, pAllocator); +} + +VkResult anv_SetPrivateDataEXT( + VkDevice _device, + VkObjectType objectType, + uint64_t objectHandle, + VkPrivateDataSlotEXT privateDataSlot, + uint64_t data) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + return vk_object_base_set_private_data(&device->vk, + objectType, objectHandle, + privateDataSlot, data); +} + +void anv_GetPrivateDataEXT( + VkDevice _device, + VkObjectType objectType, + uint64_t objectHandle, + VkPrivateDataSlotEXT privateDataSlot, + uint64_t* pData) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + vk_object_base_get_private_data(&device->vk, + objectType, objectHandle, + privateDataSlot, pData); +}