radv: add an assertion in radv_gfx10_compute_bin_size()
[mesa.git] / src / amd / vulkan / radv_pipeline.c
index 5036fa69d2084e269ecb4f0f44f0c6ac5f42c15c..9c83e22fda2c197bda5f2df7b43f0898614cccf8 100644 (file)
@@ -167,6 +167,8 @@ static uint32_t get_hash_flags(struct radv_device *device)
                hash_flags |= RADV_HASH_SHADER_PS_WAVE32;
        if (device->physical_device->ge_wave_size == 32)
                hash_flags |= RADV_HASH_SHADER_GE_WAVE32;
+       if (device->physical_device->use_aco)
+               hash_flags |= RADV_HASH_SHADER_ACO;
        return hash_flags;
 }
 
@@ -2261,6 +2263,9 @@ radv_generate_graphics_pipeline_key(struct radv_pipeline *pipeline,
        if (pipeline->device->physical_device->rad_info.chip_class < GFX8)
                radv_pipeline_compute_get_int_clamp(pCreateInfo, &key.is_int8, &key.is_int10);
 
+       if (pipeline->device->physical_device->rad_info.chip_class >= GFX10)
+               key.topology = pCreateInfo->pInputAssemblyState->topology;
+
        return key;
 }
 
@@ -2290,6 +2295,7 @@ radv_fill_shader_keys(struct radv_device *device,
                keys[MESA_SHADER_VERTEX].vs.vertex_attribute_offsets[i] = key->vertex_attribute_offsets[i];
                keys[MESA_SHADER_VERTEX].vs.vertex_attribute_strides[i] = key->vertex_attribute_strides[i];
        }
+       keys[MESA_SHADER_VERTEX].vs.outprim = si_conv_prim_to_gs_out(key->topology);
 
        if (nir[MESA_SHADER_TESS_CTRL]) {
                keys[MESA_SHADER_VERTEX].vs_common_out.as_ls = true;
@@ -2307,9 +2313,7 @@ radv_fill_shader_keys(struct radv_device *device,
                        keys[MESA_SHADER_VERTEX].vs_common_out.as_es = true;
        }
 
-       if (device->physical_device->rad_info.chip_class >= GFX10 &&
-           device->physical_device->rad_info.family != CHIP_NAVI14 &&
-           !(device->instance->debug_flags & RADV_DEBUG_NO_NGG)) {
+       if (device->physical_device->use_ngg) {
                if (nir[MESA_SHADER_TESS_CTRL]) {
                        keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = true;
                } else {
@@ -2551,6 +2555,14 @@ void radv_stop_feedback(VkPipelineCreationFeedbackEXT *feedback, bool cache_hit)
                           (cache_hit ? VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT : 0);
 }
 
+static
+bool radv_aco_supported_stage(gl_shader_stage stage, bool has_gs, bool has_ts)
+{
+       return (stage == MESA_SHADER_VERTEX && !has_gs && !has_ts) ||
+              stage == MESA_SHADER_FRAGMENT ||
+              stage == MESA_SHADER_COMPUTE;
+}
+
 static
 void radv_create_shaders(struct radv_pipeline *pipeline,
                          struct radv_device *device,
@@ -2613,6 +2625,10 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                modules[MESA_SHADER_FRAGMENT] = &fs_m;
        }
 
+       bool has_gs = modules[MESA_SHADER_GEOMETRY];
+       bool has_ts = modules[MESA_SHADER_TESS_CTRL] || modules[MESA_SHADER_TESS_EVAL];
+       bool use_aco = device->physical_device->use_aco;
+
        for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
                const VkPipelineShaderStageCreateInfo *stage = pStages[i];
 
@@ -2621,10 +2637,11 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
 
                radv_start_feedback(stage_feedbacks[i]);
 
+               bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
                nir[i] = radv_shader_compile_to_nir(device, modules[i],
                                                    stage ? stage->pName : "main", i,
                                                    stage ? stage->pSpecializationInfo : NULL,
-                                                   flags, pipeline->layout);
+                                                   flags, pipeline->layout, aco);
 
                /* We don't want to alter meta shaders IR directly so clone it
                 * first.
@@ -2651,7 +2668,10 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                                           nir_lower_non_uniform_ssbo_access |
                                           nir_lower_non_uniform_texture_access |
                                           nir_lower_non_uniform_image_access);
-                       NIR_PASS_V(nir[i], nir_lower_bool_to_int32);
+
+                       bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
+                       if (!aco)
+                               NIR_PASS_V(nir[i], nir_lower_bool_to_int32);
                }
 
                if (radv_can_dump_shader(device, modules[i], false))
@@ -2690,11 +2710,13 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) {
                        radv_start_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT]);
 
+                       bool aco = use_aco && radv_aco_supported_stage(MESA_SHADER_FRAGMENT, has_gs, has_ts);
                        pipeline->shaders[MESA_SHADER_FRAGMENT] =
                               radv_shader_variant_compile(device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1,
                                                          pipeline->layout, keys + MESA_SHADER_FRAGMENT,
                                                          infos + MESA_SHADER_FRAGMENT,
-                                                         keep_executable_info, &binaries[MESA_SHADER_FRAGMENT]);
+                                                         keep_executable_info, aco,
+                                                         &binaries[MESA_SHADER_FRAGMENT]);
 
                        radv_stop_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT], false);
                }
@@ -2725,7 +2747,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                        pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_compile(device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2,
                                                                                              pipeline->layout,
                                                                                              &key, &infos[MESA_SHADER_TESS_CTRL], keep_executable_info,
-                                                                                             &binaries[MESA_SHADER_TESS_CTRL]);
+                                                                                             false, &binaries[MESA_SHADER_TESS_CTRL]);
 
                        radv_stop_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL], false);
                }
@@ -2744,7 +2766,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                        pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_compile(device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2,
                                                                                             pipeline->layout,
                                                                                             &keys[pre_stage], &infos[MESA_SHADER_GEOMETRY], keep_executable_info,
-                                                                                            &binaries[MESA_SHADER_GEOMETRY]);
+                                                                                            false, &binaries[MESA_SHADER_GEOMETRY]);
 
                        radv_stop_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY], false);
                }
@@ -2763,10 +2785,11 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
 
                        radv_start_feedback(stage_feedbacks[i]);
 
+                       bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
                        pipeline->shaders[i] = radv_shader_variant_compile(device, modules[i], &nir[i], 1,
                                                                          pipeline->layout,
                                                                          keys + i, infos + i,keep_executable_info,
-                                                                         &binaries[i]);
+                                                                         aco, &binaries[i]);
 
                        radv_stop_feedback(stage_feedbacks[i], false);
                }
@@ -3203,6 +3226,7 @@ radv_gfx10_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipe
                        color_bytes_per_pixel += vk_format_get_blocksize(format);
 
                        if (total_samples > 1) {
+                               assert(samples_log <= 3);
                                const unsigned fmask_array[] = {0, 1, 1, 4};
                                fmask_bytes_per_pixel += fmask_array[samples_log];
                        }
@@ -3372,6 +3396,7 @@ radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs,
        const VkPipelineDepthStencilStateCreateInfo *vkds = pCreateInfo->pDepthStencilState;
        RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
        struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+       struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
        struct radv_render_pass_attachment *attachment = NULL;
        uint32_t db_depth_control = 0, db_stencil_control = 0;
        uint32_t db_render_control = 0, db_render_override2 = 0;
@@ -3420,7 +3445,8 @@ radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs,
        db_render_override |= S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
                              S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
 
-       if (!pCreateInfo->pRasterizationState->depthClampEnable) {
+       if (!pCreateInfo->pRasterizationState->depthClampEnable &&
+           ps->info.ps.writes_z) {
                /* From VK_EXT_depth_range_unrestricted spec:
                 *
                 * "The behavior described in Primitive Clipping still applies.
@@ -4810,8 +4836,8 @@ radv_compute_generate_pm4(struct radv_pipeline *pipeline)
        unsigned max_waves_per_sh = 0;
        uint64_t va;
 
-       pipeline->cs.buf = malloc(20 * 4);
-       pipeline->cs.max_dw = 20;
+       pipeline->cs.max_dw = device->physical_device->rad_info.chip_class >= GFX10 ? 22 : 20;
+       pipeline->cs.buf = malloc(pipeline->cs.max_dw * 4);
 
        compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
        va = radv_buffer_get_va(compute_shader->bo) + compute_shader->bo_offset;
@@ -4823,6 +4849,9 @@ radv_compute_generate_pm4(struct radv_pipeline *pipeline)
        radeon_set_sh_reg_seq(&pipeline->cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
        radeon_emit(&pipeline->cs, compute_shader->config.rsrc1);
        radeon_emit(&pipeline->cs, compute_shader->config.rsrc2);
+       if (device->physical_device->rad_info.chip_class >= GFX10) {
+               radeon_set_sh_reg(&pipeline->cs, R_00B8A0_COMPUTE_PGM_RSRC3, compute_shader->config.rsrc3);
+       }
 
        radeon_set_sh_reg(&pipeline->cs, R_00B860_COMPUTE_TMPRING_SIZE,
                          S_00B860_WAVES(pipeline->max_waves) |
@@ -5220,12 +5249,17 @@ VkResult radv_GetPipelineExecutableInternalRepresentationsKHR(
        }
        ++p;
 
-       /* LLVM IR */
+       /* backend IR */
        if (p < end) {
                p->isText = true;
-               desc_copy(p->name, "LLVM IR");
-               desc_copy(p->description, "The LLVM IR after some optimizations");
-               if (radv_copy_representation(p->pData, &p->dataSize, shader->llvm_ir_string) != VK_SUCCESS)
+               if (shader->aco_used) {
+                       desc_copy(p->name, "ACO IR");
+                       desc_copy(p->description, "The ACO IR after some optimizations");
+               } else {
+                       desc_copy(p->name, "LLVM IR");
+                       desc_copy(p->description, "The LLVM IR after some optimizations");
+               }
+               if (radv_copy_representation(p->pData, &p->dataSize, shader->ir_string) != VK_SUCCESS)
                        result = VK_INCOMPLETE;
        }
        ++p;