radv: Track scratch usage across pipelines & command buffers.
[mesa.git] / src / amd / vulkan / radv_pipeline.c
index 75785ec921d040ff59869fada72ed879f53b4a37..e332877e2ba6b11633a11e44db32281533a8f875 100644 (file)
@@ -104,6 +104,19 @@ void radv_DestroyShaderModule(
        vk_free2(&device->alloc, pAllocator, module);
 }
 
+
+static void
+radv_pipeline_destroy(struct radv_device *device,
+                      struct radv_pipeline *pipeline,
+                      const VkAllocationCallbacks* allocator)
+{
+       for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i)
+               if (pipeline->shaders[i])
+                       radv_shader_variant_destroy(device, pipeline->shaders[i]);
+
+       vk_free2(&device->alloc, allocator, pipeline);
+}
+
 void radv_DestroyPipeline(
        VkDevice                                    _device,
        VkPipeline                                  _pipeline,
@@ -115,11 +128,7 @@ void radv_DestroyPipeline(
        if (!_pipeline)
                return;
 
-       for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i)
-               if (pipeline->shaders[i])
-                       radv_shader_variant_destroy(device, pipeline->shaders[i]);
-
-       vk_free2(&device->alloc, pAllocator, pipeline);
+       radv_pipeline_destroy(device, pipeline, pAllocator);
 }
 
 
@@ -188,7 +197,10 @@ radv_shader_compile_to_nir(struct radv_device *device,
                                assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
 
                                spec_entries[i].id = spec_info->pMapEntries[i].constantID;
-                               spec_entries[i].data = *(const uint32_t *)data;
+                               if (spec_info->dataSize == 8)
+                                       spec_entries[i].data64 = *(const uint64_t *)data;
+                               else
+                                       spec_entries[i].data32 = *(const uint32_t *)data;
                        }
                }
                const struct nir_spirv_supported_extensions supported_ext = {
@@ -202,11 +214,13 @@ radv_shader_compile_to_nir(struct radv_device *device,
 
                free(spec_entries);
 
-               nir_lower_returns(nir);
-               nir_validate_shader(nir);
-
-               nir_inline_functions(nir);
-               nir_validate_shader(nir);
+               /* We have to lower away local constant initializers right before we
+                * inline functions.  That way they get properly initialized at the top
+                * of the function and not at the top of its caller.
+                */
+               NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local);
+               NIR_PASS_V(nir, nir_lower_returns);
+               NIR_PASS_V(nir, nir_inline_functions);
 
                /* Pick off the single entrypoint that we want */
                foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
@@ -216,13 +230,14 @@ radv_shader_compile_to_nir(struct radv_device *device,
                assert(exec_list_length(&nir->functions) == 1);
                entry_point->name = ralloc_strdup(entry_point, "main");
 
-               nir_remove_dead_variables(nir, nir_var_shader_in);
-               nir_remove_dead_variables(nir, nir_var_shader_out);
-               nir_remove_dead_variables(nir, nir_var_system_value);
-               nir_validate_shader(nir);
+               NIR_PASS_V(nir, nir_remove_dead_variables,
+                          nir_var_shader_in | nir_var_shader_out | nir_var_system_value);
 
-               nir_lower_system_values(nir);
-               nir_validate_shader(nir);
+               /* Now that we've deleted all but the main function, we can go ahead and
+                * lower the rest of the constant initializers.
+                */
+               NIR_PASS_V(nir, nir_lower_constant_initializers, ~0);
+               NIR_PASS_V(nir, nir_lower_system_values);
        }
 
        /* Vulkan uses the separate-shader linking model */
@@ -272,7 +287,7 @@ static const char *radv_get_shader_name(struct radv_shader_variant *var,
 }
 static void radv_dump_pipeline_stats(struct radv_device *device, struct radv_pipeline *pipeline)
 {
-       unsigned lds_increment = device->instance->physicalDevice.rad_info.chip_class >= CIK ? 512 : 256;
+       unsigned lds_increment = device->physical_device->rad_info.chip_class >= CIK ? 512 : 256;
        struct radv_shader_variant *var;
        struct ac_shader_config *conf;
        int i;
@@ -293,7 +308,7 @@ static void radv_dump_pipeline_stats(struct radv_device *device, struct radv_pip
                }
 
                if (conf->num_sgprs) {
-                       if (device->instance->physicalDevice.rad_info.chip_class >= VI)
+                       if (device->physical_device->rad_info.chip_class >= VI)
                                max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs);
                        else
                                max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs);
@@ -403,7 +418,7 @@ static struct radv_shader_variant *radv_shader_variant_create(struct radv_device
                                                              bool dump)
 {
        struct radv_shader_variant *variant = calloc(1, sizeof(struct radv_shader_variant));
-       enum radeon_family chip_family = device->instance->physicalDevice.rad_info.family;
+       enum radeon_family chip_family = device->physical_device->rad_info.family;
        LLVMTargetMachineRef tm;
        if (!variant)
                return NULL;
@@ -415,10 +430,11 @@ static struct radv_shader_variant *radv_shader_variant_create(struct radv_device
 
        struct ac_shader_binary binary;
 
-       options.unsafe_math = env_var_as_boolean("RADV_UNSAFE_MATH", false);
+       options.unsafe_math = !!(device->debug_flags & RADV_DEBUG_UNSAFE_MATH);
        options.family = chip_family;
-       options.chip_class = device->instance->physicalDevice.rad_info.chip_class;
-       tm = ac_create_target_machine(chip_family);
+       options.chip_class = device->physical_device->rad_info.chip_class;
+       options.supports_spill = false;
+       tm = ac_create_target_machine(chip_family, false);
        ac_compile_nir_shader(tm, &binary, &variant->config,
                              &variant->info, shader, &options, dump);
        LLVMDisposeTargetMachine(tm);
@@ -448,14 +464,14 @@ radv_pipeline_compile(struct radv_pipeline *pipeline,
                      gl_shader_stage stage,
                      const VkSpecializationInfo *spec_info,
                      struct radv_pipeline_layout *layout,
-                     const union ac_shader_variant_key *key,
-                     bool dump)
+                     const union ac_shader_variant_key *key)
 {
        unsigned char sha1[20];
        struct radv_shader_variant *variant;
        nir_shader *nir;
        void *code = NULL;
        unsigned code_size = 0;
+       bool dump = (pipeline->device->debug_flags & RADV_DEBUG_DUMP_SHADERS);
 
        if (module->nir)
                _mesa_sha1_compute(module->nir->info->name,
@@ -492,6 +508,48 @@ radv_pipeline_compile(struct radv_pipeline *pipeline,
        return variant;
 }
 
+static VkResult
+radv_pipeline_scratch_init(struct radv_device *device,
+                           struct radv_pipeline *pipeline)
+{
+       unsigned scratch_bytes_per_wave = 0;
+       unsigned max_waves = 0;
+       unsigned min_waves = 1;
+
+       for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+               if (pipeline->shaders[i]) {
+                       unsigned max_stage_waves = device->scratch_waves;
+
+                       scratch_bytes_per_wave = MAX2(scratch_bytes_per_wave,
+                                                     pipeline->shaders[i]->config.scratch_bytes_per_wave);
+
+                       max_stage_waves = MIN2(max_stage_waves,
+                                 4 * device->physical_device->rad_info.num_good_compute_units *
+                                 (256 / pipeline->shaders[i]->config.num_vgprs));
+                       max_waves = MAX2(max_waves, max_stage_waves);
+               }
+       }
+
+       if (pipeline->shaders[MESA_SHADER_COMPUTE]) {
+               unsigned group_size = pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[0] *
+                                     pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[1] *
+                                     pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[2];
+               min_waves = MAX2(min_waves, round_up_u32(group_size, 64));
+       }
+
+       if (scratch_bytes_per_wave)
+               max_waves = MIN2(max_waves, 0xffffffffu / scratch_bytes_per_wave);
+
+       if (scratch_bytes_per_wave && max_waves < min_waves) {
+               /* Not really true at this moment, but will be true on first
+                * execution. Avoid having hanging shaders. */
+               return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+       }
+       pipeline->scratch_bytes_per_wave = scratch_bytes_per_wave;
+       pipeline->max_waves = max_waves;
+       return VK_SUCCESS;
+}
+
 static uint32_t si_translate_blend_function(VkBlendOp op)
 {
        switch (op) {
@@ -1028,7 +1086,7 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline,
        const VkPipelineMultisampleStateCreateInfo *vkms = pCreateInfo->pMultisampleState;
        struct radv_blend_state *blend = &pipeline->graphics.blend;
        struct radv_multisample_state *ms = &pipeline->graphics.ms;
-       unsigned num_tile_pipes = pipeline->device->instance->physicalDevice.rad_info.num_tile_pipes;
+       unsigned num_tile_pipes = pipeline->device->physical_device->rad_info.num_tile_pipes;
        int ps_iter_samples = 1;
        uint32_t mask = 0xffff;
 
@@ -1306,8 +1364,8 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
                   const VkAllocationCallbacks *alloc)
 {
        struct radv_shader_module fs_m = {0};
+       VkResult result;
 
-       bool dump = getenv("RADV_DUMP_SHADERS");
        if (alloc == NULL)
                alloc = &device->alloc;
 
@@ -1334,7 +1392,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
                                               pStages[MESA_SHADER_VERTEX]->pName,
                                               MESA_SHADER_VERTEX,
                                               pStages[MESA_SHADER_VERTEX]->pSpecializationInfo,
-                                              pipeline->layout, &key, dump);
+                                              pipeline->layout, &key);
 
                pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_VERTEX);
        }
@@ -1359,7 +1417,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
                                               stage ? stage->pName : "main",
                                               MESA_SHADER_FRAGMENT,
                                               stage ? stage->pSpecializationInfo : NULL,
-                                              pipeline->layout, &key, dump);
+                                              pipeline->layout, &key);
                pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_FRAGMENT);
        }
 
@@ -1411,11 +1469,12 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
                pipeline->binding_stride[desc->binding] = desc->stride;
        }
 
-       if (device->shader_stats_dump) {
+       if (device->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) {
                radv_dump_pipeline_stats(device, pipeline);
        }
 
-       return VK_SUCCESS;
+       result = radv_pipeline_scratch_init(device, pipeline);
+       return result;
 }
 
 VkResult
@@ -1441,7 +1500,7 @@ radv_graphics_pipeline_create(
        result = radv_pipeline_init(pipeline, device, cache,
                                    pCreateInfo, extra, pAllocator);
        if (result != VK_SUCCESS) {
-               vk_free2(&device->alloc, pAllocator, pipeline);
+               radv_pipeline_destroy(device, pipeline, pAllocator);
                return result;
        }
 
@@ -1487,7 +1546,7 @@ static VkResult radv_compute_pipeline_create(
        RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
        RADV_FROM_HANDLE(radv_shader_module, module, pCreateInfo->stage.module);
        struct radv_pipeline *pipeline;
-       bool dump = getenv("RADV_DUMP_SHADERS");
+       VkResult result;
 
        pipeline = vk_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
@@ -1503,11 +1562,18 @@ static VkResult radv_compute_pipeline_create(
                                       pCreateInfo->stage.pName,
                                       MESA_SHADER_COMPUTE,
                                       pCreateInfo->stage.pSpecializationInfo,
-                                      pipeline->layout, NULL, dump);
+                                      pipeline->layout, NULL);
+
+
+       result = radv_pipeline_scratch_init(device, pipeline);
+       if (result != VK_SUCCESS) {
+               radv_pipeline_destroy(device, pipeline, pAllocator);
+               return result;
+       }
 
        *pPipeline = radv_pipeline_to_handle(pipeline);
 
-       if (device->shader_stats_dump) {
+       if (device->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) {
                radv_dump_pipeline_stats(device, pipeline);
        }
        return VK_SUCCESS;