radv/gfx10: add a separate flag for creating a GDS OA buffer
[mesa.git] / src / amd / vulkan / radv_device.c
index 7b4ba74c9770c8ec09148a8ba7028975deed5b5c..ef0e866ef52b12c8555e1b0bfcd2e9b6a8776103 100644 (file)
@@ -398,10 +398,6 @@ radv_physical_device_init(struct radv_physical_device *device,
        radv_handle_env_var_force_family(device);
 
        device->use_aco = instance->perftest_flags & RADV_PERFTEST_ACO;
-       if (device->rad_info.chip_class < GFX7 && device->use_aco) {
-               fprintf(stderr, "WARNING: disabling ACO on unsupported GPUs.\n");
-               device->use_aco = false;
-       }
 
        snprintf(device->name, sizeof(device->name),
                 "AMD RADV%s %s (LLVM " MESA_LLVM_VERSION_STRING ")", device->use_aco ? "/ACO" : "",
@@ -426,8 +422,7 @@ radv_physical_device_init(struct radv_physical_device *device,
        disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
        device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
 
-       if (device->rad_info.chip_class < GFX8 ||
-           device->rad_info.chip_class > GFX9)
+       if (device->rad_info.chip_class < GFX8)
                fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
 
        radv_get_driver_uuid(&device->driver_uuid);
@@ -1219,6 +1214,58 @@ void radv_GetPhysicalDeviceFeatures2(
                        features->shaderDrawParameters = true;
                        break;
                }
+               case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES: {
+                       VkPhysicalDeviceVulkan12Features *features =
+                               (VkPhysicalDeviceVulkan12Features *)ext;
+                       features->samplerMirrorClampToEdge = true;
+                       features->drawIndirectCount = true;
+                       features->storageBuffer8BitAccess = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
+                       features->uniformAndStorageBuffer8BitAccess = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
+                       features->storagePushConstant8 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
+                       features->shaderBufferInt64Atomics = LLVM_VERSION_MAJOR >= 9;
+                       features->shaderSharedInt64Atomics = LLVM_VERSION_MAJOR >= 9;
+                       features->shaderFloat16 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
+                       features->shaderInt8 = !pdevice->use_aco;
+                       features->descriptorIndexing = true;
+                       features->shaderInputAttachmentArrayDynamicIndexing = true;
+                       features->shaderUniformTexelBufferArrayDynamicIndexing = true;
+                       features->shaderStorageTexelBufferArrayDynamicIndexing = true;
+                       features->shaderUniformBufferArrayNonUniformIndexing = true;
+                       features->shaderSampledImageArrayNonUniformIndexing = true;
+                       features->shaderStorageBufferArrayNonUniformIndexing = true;
+                       features->shaderStorageImageArrayNonUniformIndexing = true;
+                       features->shaderInputAttachmentArrayNonUniformIndexing = true;
+                       features->shaderUniformTexelBufferArrayNonUniformIndexing = true;
+                       features->shaderStorageTexelBufferArrayNonUniformIndexing = true;
+                       features->descriptorBindingUniformBufferUpdateAfterBind = true;
+                       features->descriptorBindingSampledImageUpdateAfterBind = true;
+                       features->descriptorBindingStorageImageUpdateAfterBind = true;
+                       features->descriptorBindingStorageBufferUpdateAfterBind = true;
+                       features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
+                       features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
+                       features->descriptorBindingUpdateUnusedWhilePending = true;
+                       features->descriptorBindingPartiallyBound = true;
+                       features->descriptorBindingVariableDescriptorCount = true;
+                       features->runtimeDescriptorArray = true;
+                       features->samplerFilterMinmax = pdevice->rad_info.chip_class >= GFX7;
+                       features->scalarBlockLayout = pdevice->rad_info.chip_class >= GFX7;
+                       features->imagelessFramebuffer = true;
+                       features->uniformBufferStandardLayout = true;
+                       features->shaderSubgroupExtendedTypes = true;
+                       features->separateDepthStencilLayouts = true;
+                       features->hostQueryReset = true;
+                       features->timelineSemaphore = pdevice->rad_info.has_syncobj_wait_for_submit;
+                       features->bufferDeviceAddress = true;
+                       features->bufferDeviceAddressCaptureReplay = false;
+                       features->bufferDeviceAddressMultiDevice = false;
+                       features->vulkanMemoryModel = false;
+                       features->vulkanMemoryModelDeviceScope = false;
+                       features->vulkanMemoryModelAvailabilityVisibilityChains = false;
+                       features->shaderOutputViewportIndex = true;
+                       features->shaderOutputLayer = true;
+                       features->subgroupBroadcastDynamicId = true;
+                       break;
+               }
                default:
                        break;
                }
@@ -1587,8 +1634,8 @@ void radv_GetPhysicalDeviceProperties2(
 
                        driver_props->conformanceVersion = (VkConformanceVersion) {
                                .major = 1,
-                               .minor = 1,
-                               .subminor = 2,
+                               .minor = 2,
+                               .subminor = 0,
                                .patch = 0,
                        };
                        break;
@@ -1759,6 +1806,102 @@ void radv_GetPhysicalDeviceProperties2(
                        props->maxMemoryAllocationSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
                        break;
                }
+               case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES: {
+                       VkPhysicalDeviceVulkan12Properties *props =
+                               (VkPhysicalDeviceVulkan12Properties *)ext;
+
+                       {
+                               props->driverID = VK_DRIVER_ID_MESA_RADV;
+                               snprintf(props->driverName, VK_MAX_DRIVER_NAME_SIZE, "radv");
+                               snprintf(props->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
+                                        "Mesa " PACKAGE_VERSION MESA_GIT_SHA1
+                                        " (LLVM " MESA_LLVM_VERSION_STRING ")");
+
+                               props->conformanceVersion = (VkConformanceVersion) {
+                                       .major = 1,
+                                       .minor = 1,
+                                       .subminor = 2,
+                                       .patch = 0,
+                               };
+                       }
+
+                       props->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
+                       props->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
+
+                       props->shaderDenormFlushToZeroFloat32 = true;
+                       props->shaderDenormPreserveFloat32 = false;
+                       props->shaderRoundingModeRTEFloat32 = true;
+                       props->shaderRoundingModeRTZFloat32 = false;
+                       props->shaderSignedZeroInfNanPreserveFloat32 = true;
+
+                       props->shaderDenormFlushToZeroFloat16 = false;
+                       props->shaderDenormPreserveFloat16 = pdevice->rad_info.chip_class >= GFX8;
+                       props->shaderRoundingModeRTEFloat16 = pdevice->rad_info.chip_class >= GFX8;
+                       props->shaderRoundingModeRTZFloat16 = false;
+                       props->shaderSignedZeroInfNanPreserveFloat16 = pdevice->rad_info.chip_class >= GFX8;
+
+                       props->shaderDenormFlushToZeroFloat64 = false;
+                       props->shaderDenormPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
+                       props->shaderRoundingModeRTEFloat64 = pdevice->rad_info.chip_class >= GFX8;
+                       props->shaderRoundingModeRTZFloat64 = false;
+                       props->shaderSignedZeroInfNanPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
+
+                       props->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
+                       props->shaderUniformBufferArrayNonUniformIndexingNative = false;
+                       props->shaderSampledImageArrayNonUniformIndexingNative = false;
+                       props->shaderStorageBufferArrayNonUniformIndexingNative = false;
+                       props->shaderStorageImageArrayNonUniformIndexingNative = false;
+                       props->shaderInputAttachmentArrayNonUniformIndexingNative = false;
+                       props->robustBufferAccessUpdateAfterBind = false;
+                       props->quadDivergentImplicitLod = false;
+
+                       size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
+                               MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
+                                 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
+                                  32 /* storage buffer, 32 due to potential space wasted on alignment */ +
+                                  32 /* sampler, largest when combined with image */ +
+                                  64 /* sampled image */ +
+                                  64 /* storage image */);
+                       props->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
+                       props->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
+                       props->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
+                       props->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
+                       props->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
+                       props->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
+                       props->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
+                       props->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
+                       props->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
+                       props->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
+                       props->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
+                       props->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
+                       props->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
+                       props->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
+                       props->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
+
+                       /* We support all of the depth resolve modes */
+                       props->supportedDepthResolveModes =
+                               VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
+                               VK_RESOLVE_MODE_AVERAGE_BIT_KHR |
+                               VK_RESOLVE_MODE_MIN_BIT_KHR |
+                               VK_RESOLVE_MODE_MAX_BIT_KHR;
+
+                       /* Average doesn't make sense for stencil so we don't support that */
+                       props->supportedStencilResolveModes =
+                               VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
+                               VK_RESOLVE_MODE_MIN_BIT_KHR |
+                               VK_RESOLVE_MODE_MAX_BIT_KHR;
+
+                       props->independentResolveNone = true;
+                       props->independentResolve = true;
+
+                       props->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9;
+                       props->filterMinmaxSingleComponentFormats = true;
+
+                       props->maxTimelineSemaphoreValueDifference = UINT64_MAX;
+
+                       props->framebufferIntegerColorSampleCounts = VK_SAMPLE_COUNT_1_BIT;
+                       break;
+               }
                default:
                        break;
                }
@@ -3492,6 +3635,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
                     uint32_t gsvs_ring_size,
                     bool needs_tess_rings,
                     bool needs_gds,
+                    bool needs_gds_oa,
                     bool needs_sample_positions,
                     struct radeon_cmdbuf **initial_full_flush_preamble_cs,
                      struct radeon_cmdbuf **initial_preamble_cs,
@@ -3506,7 +3650,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
        struct radeon_winsys_bo *gds_bo = NULL;
        struct radeon_winsys_bo *gds_oa_bo = NULL;
        struct radeon_cmdbuf *dest_cs[3] = {0};
-       bool add_tess_rings = false, add_gds = false, add_sample_positions = false;
+       bool add_tess_rings = false, add_gds = false, add_gds_oa = false, add_sample_positions = false;
        unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
        unsigned max_offchip_buffers;
        unsigned hs_offchip_param = 0;
@@ -3520,6 +3664,10 @@ radv_get_preamble_cs(struct radv_queue *queue,
                if (needs_gds)
                        add_gds = true;
        }
+       if (!queue->has_gds_oa) {
+               if (needs_gds_oa)
+                       add_gds_oa = true;
+       }
        if (!queue->has_sample_positions) {
                if (needs_sample_positions)
                        add_sample_positions = true;
@@ -3549,14 +3697,14 @@ radv_get_preamble_cs(struct radv_queue *queue,
            compute_scratch_waves <= queue->compute_scratch_waves &&
            esgs_ring_size <= queue->esgs_ring_size &&
            gsvs_ring_size <= queue->gsvs_ring_size &&
-           !add_tess_rings && !add_gds && !add_sample_positions &&
+           !add_tess_rings && !add_gds && !add_gds_oa && !add_sample_positions &&
            queue->initial_preamble_cs) {
                *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
                *initial_preamble_cs = queue->initial_preamble_cs;
                *continue_preamble_cs = queue->continue_preamble_cs;
                if (!scratch_size_per_wave && !compute_scratch_size_per_wave &&
                    !esgs_ring_size && !gsvs_ring_size && !needs_tess_rings &&
-                   !needs_gds && !needs_sample_positions)
+                   !needs_gds && !needs_gds_oa && !needs_sample_positions)
                        *continue_preamble_cs = NULL;
                return VK_SUCCESS;
        }
@@ -3644,6 +3792,12 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                                          RADV_BO_PRIORITY_SCRATCH);
                if (!gds_bo)
                        goto fail;
+       } else {
+               gds_bo = queue->gds_bo;
+       }
+
+       if (add_gds_oa) {
+               assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
 
                gds_oa_bo = queue->device->ws->buffer_create(queue->device->ws,
                                                             4, 1,
@@ -3653,7 +3807,6 @@ radv_get_preamble_cs(struct radv_queue *queue,
                if (!gds_oa_bo)
                        goto fail;
        } else {
-               gds_bo = queue->gds_bo;
                gds_oa_bo = queue->gds_oa_bo;
        }
 
@@ -3837,8 +3990,10 @@ radv_get_preamble_cs(struct radv_queue *queue,
                queue->has_gds = true;
        }
 
-       if (gds_oa_bo != queue->gds_oa_bo)
+       if (gds_oa_bo != queue->gds_oa_bo) {
                queue->gds_oa_bo = gds_oa_bo;
+               queue->has_gds_oa = true;
+       }
 
        if (descriptor_bo != queue->descriptor_bo) {
                if (queue->descriptor_bo)
@@ -4109,6 +4264,7 @@ radv_get_preambles(struct radv_queue *queue,
        uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
        bool tess_rings_needed = false;
        bool gds_needed = false;
+       bool gds_oa_needed = false;
        bool sample_positions_needed = false;
 
        for (uint32_t j = 0; j < cmd_buffer_count; j++) {
@@ -4125,13 +4281,14 @@ radv_get_preambles(struct radv_queue *queue,
                gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
                tess_rings_needed |= cmd_buffer->tess_rings_needed;
                gds_needed |= cmd_buffer->gds_needed;
+               gds_oa_needed |= cmd_buffer->gds_oa_needed;
                sample_positions_needed |= cmd_buffer->sample_positions_needed;
        }
 
        return radv_get_preamble_cs(queue, scratch_size_per_wave, waves_wanted,
                                    compute_scratch_size_per_wave, compute_waves_wanted,
                                    esgs_ring_size, gsvs_ring_size, tess_rings_needed,
-                                   gds_needed, sample_positions_needed,
+                                   gds_needed, gds_oa_needed, sample_positions_needed,
                                    initial_full_flush_preamble_cs,
                                    initial_preamble_cs, continue_preamble_cs);
 }
@@ -4915,7 +5072,6 @@ static VkResult radv_alloc_memory(struct radv_device *device,
 
 fail:
        radv_free_memory(device, pAllocator,mem);
-       vk_free2(&device->alloc, pAllocator, mem);
 
        return result;
 }