radv: use Mesa's u_atomic.h header
[mesa.git] / src / amd / vulkan / radv_pipeline.c
index aada4d2f30d71f4aad114cdf868f7f8ae1f67f1d..efe641dc0be7f3177ffab65046bffbaab3e31445 100644 (file)
@@ -26,6 +26,7 @@
  */
 
 #include "util/mesa-sha1.h"
+#include "util/u_atomic.h"
 #include "radv_private.h"
 #include "nir/nir.h"
 #include "nir/nir_builder.h"
 #include <llvm-c/TargetMachine.h>
 
 #include "sid.h"
+#include "gfx9d.h"
 #include "r600d_common.h"
 #include "ac_binary.h"
 #include "ac_llvm_util.h"
 #include "ac_nir_to_llvm.h"
 #include "vk_format.h"
 #include "util/debug.h"
+#include "ac_exp_param.h"
 
 void radv_shader_variant_destroy(struct radv_device *device,
                                  struct radv_shader_variant *variant);
@@ -51,6 +54,7 @@ static const struct nir_shader_compiler_options nir_options = {
        .lower_flrp32 = true,
        .lower_fsat = true,
        .lower_fdiv = true,
+       .lower_sub = true,
        .lower_pack_snorm_2x16 = true,
        .lower_pack_snorm_4x8 = true,
        .lower_pack_unorm_2x16 = true,
@@ -61,6 +65,7 @@ static const struct nir_shader_compiler_options nir_options = {
        .lower_unpack_unorm_4x8 = true,
        .lower_extract_byte = true,
        .lower_extract_word = true,
+       .max_unroll_iterations = 32
 };
 
 VkResult radv_CreateShaderModule(
@@ -152,6 +157,12 @@ radv_optimize_nir(struct nir_shader *shader)
                 NIR_PASS(progress, shader, nir_copy_prop);
                 NIR_PASS(progress, shader, nir_opt_remove_phis);
                 NIR_PASS(progress, shader, nir_opt_dce);
+                if (nir_opt_trivial_continues(shader)) {
+                        progress = true;
+                        NIR_PASS(progress, shader, nir_copy_prop);
+                        NIR_PASS(progress, shader, nir_opt_dce);
+                }
+                NIR_PASS(progress, shader, nir_opt_if);
                 NIR_PASS(progress, shader, nir_opt_dead_cf);
                 NIR_PASS(progress, shader, nir_opt_cse);
                 NIR_PASS(progress, shader, nir_opt_peephole_select, 8);
@@ -159,6 +170,9 @@ radv_optimize_nir(struct nir_shader *shader)
                 NIR_PASS(progress, shader, nir_opt_constant_folding);
                 NIR_PASS(progress, shader, nir_opt_undef);
                 NIR_PASS(progress, shader, nir_opt_conditional_discard);
+                if (shader->options->max_unroll_iterations) {
+                        NIR_PASS(progress, shader, nir_opt_loop_unroll, 0);
+                }
         } while (progress);
 }
 
@@ -252,7 +266,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
        }
 
        /* Vulkan uses the separate-shader linking model */
-       nir->info->separate_shader = true;
+       nir->info.separate_shader = true;
 
        nir_shader_gather_info(nir, entry_point->impl);
 
@@ -361,7 +375,7 @@ static void radv_dump_pipeline_stats(struct radv_device *device, struct radv_pip
 void radv_shader_variant_destroy(struct radv_device *device,
                                  struct radv_shader_variant *variant)
 {
-       if (__sync_fetch_and_sub(&variant->ref_count, 1) != 1)
+       if (!p_atomic_dec_zero(&variant->ref_count))
                return;
 
        device->ws->buffer_destroy(variant->bo);
@@ -527,8 +541,8 @@ radv_pipeline_compile(struct radv_pipeline *pipeline,
        bool dump = (pipeline->device->debug_flags & RADV_DEBUG_DUMP_SHADERS);
 
        if (module->nir)
-               _mesa_sha1_compute(module->nir->info->name,
-                                  strlen(module->nir->info->name),
+               _mesa_sha1_compute(module->nir->info.name,
+                                  strlen(module->nir->info.name),
                                   module->sha1);
 
        radv_hash_shader(sha1, module, entrypoint, spec_info, layout, key, 0);
@@ -592,11 +606,14 @@ radv_pipeline_compile(struct radv_pipeline *pipeline,
 }
 
 static union ac_shader_variant_key
-radv_compute_tes_key(bool as_es)
+radv_compute_tes_key(bool as_es, bool export_prim_id)
 {
        union ac_shader_variant_key key;
        memset(&key, 0, sizeof(key));
        key.tes.as_es = as_es;
+       /* export prim id only happens when no geom shader */
+       if (!as_es)
+               key.tes.export_prim_id = export_prim_id;
        return key;
 }
 
@@ -627,13 +644,15 @@ radv_tess_pipeline_compile(struct radv_pipeline *pipeline,
        nir_shader *tes_nir, *tcs_nir;
        void *tes_code = NULL, *tcs_code = NULL;
        unsigned tes_code_size = 0, tcs_code_size = 0;
-       union ac_shader_variant_key tes_key = radv_compute_tes_key(radv_pipeline_has_gs(pipeline));
+       union ac_shader_variant_key tes_key;
        union ac_shader_variant_key tcs_key;
        bool dump = (pipeline->device->debug_flags & RADV_DEBUG_DUMP_SHADERS);
 
+       tes_key = radv_compute_tes_key(radv_pipeline_has_gs(pipeline),
+                                      pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.prim_id_input);
        if (tes_module->nir)
-               _mesa_sha1_compute(tes_module->nir->info->name,
-                                  strlen(tes_module->nir->info->name),
+               _mesa_sha1_compute(tes_module->nir->info.name,
+                                  strlen(tes_module->nir->info.name),
                                   tes_module->sha1);
        radv_hash_shader(tes_sha1, tes_module, tes_entrypoint, tes_spec_info, layout, &tes_key, 0);
 
@@ -645,8 +664,8 @@ radv_tess_pipeline_compile(struct radv_pipeline *pipeline,
                tcs_key = radv_compute_tcs_key(tes_variant->info.tes.primitive_mode, input_vertices);
 
                if (tcs_module->nir)
-                       _mesa_sha1_compute(tcs_module->nir->info->name,
-                                          strlen(tcs_module->nir->info->name),
+                       _mesa_sha1_compute(tcs_module->nir->info.name,
+                                          strlen(tcs_module->nir->info.name),
                                           tcs_module->sha1);
 
                radv_hash_shader(tcs_sha1, tcs_module, tcs_entrypoint, tcs_spec_info, layout, &tcs_key, 0);
@@ -675,16 +694,16 @@ radv_tess_pipeline_compile(struct radv_pipeline *pipeline,
                return;
 
        nir_lower_tes_patch_vertices(tes_nir,
-                                    tcs_nir->info->tess.tcs_vertices_out);
+                                    tcs_nir->info.tess.tcs_vertices_out);
 
        tes_variant = radv_shader_variant_create(pipeline->device, tes_nir,
                                                 layout, &tes_key, &tes_code,
                                                 &tes_code_size, dump);
 
-       tcs_key = radv_compute_tcs_key(tes_nir->info->tess.primitive_mode, input_vertices);
+       tcs_key = radv_compute_tcs_key(tes_nir->info.tess.primitive_mode, input_vertices);
        if (tcs_module->nir)
-               _mesa_sha1_compute(tcs_module->nir->info->name,
-                                  strlen(tcs_module->nir->info->name),
+               _mesa_sha1_compute(tcs_module->nir->info.name,
+                                  strlen(tcs_module->nir->info.name),
                                   tcs_module->sha1);
 
        radv_hash_shader(tcs_sha1, tcs_module, tcs_entrypoint, tcs_spec_info, layout, &tcs_key, 0);
@@ -1319,11 +1338,12 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline,
                S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
                EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
                EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1);
+       ms->pa_sc_mode_cntl_0 = S_028A48_ALTERNATE_RBS_PER_TILE(pipeline->device->physical_device->rad_info.chip_class >= GFX9);
 
        if (ms->num_samples > 1) {
                unsigned log_samples = util_logbase2(ms->num_samples);
                unsigned log_ps_iter_samples = util_logbase2(util_next_power_of_two(ps_iter_samples));
-               ms->pa_sc_mode_cntl_0 = S_028A48_MSAA_ENABLE(1);
+               ms->pa_sc_mode_cntl_0 |= S_028A48_MSAA_ENABLE(1);
                ms->pa_sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1); /* CM_R_028BDC_PA_SC_LINE_CNTL */
                ms->db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
                        S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
@@ -1592,7 +1612,7 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 }
 
 static union ac_shader_variant_key
-radv_compute_vs_key(const VkGraphicsPipelineCreateInfo *pCreateInfo, bool as_es, bool as_ls)
+radv_compute_vs_key(const VkGraphicsPipelineCreateInfo *pCreateInfo, bool as_es, bool as_ls, bool export_prim_id)
 {
        union ac_shader_variant_key key;
        const VkPipelineVertexInputStateCreateInfo *input_state =
@@ -1602,6 +1622,7 @@ radv_compute_vs_key(const VkGraphicsPipelineCreateInfo *pCreateInfo, bool as_es,
        key.vs.instance_rate_inputs = 0;
        key.vs.as_es = as_es;
        key.vs.as_ls = as_ls;
+       key.vs.export_prim_id = export_prim_id;
 
        for (unsigned i = 0; i < input_state->vertexAttributeDescriptionCount; ++i) {
                unsigned binding;
@@ -1843,6 +1864,24 @@ static uint32_t si_vgt_gs_mode(struct radv_shader_variant *gs)
               S_028A40_GS_WRITE_OPTIMIZE(1);
 }
 
+static void calculate_vgt_gs_mode(struct radv_pipeline *pipeline)
+{
+       struct radv_shader_variant *vs;
+       vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : (radv_pipeline_has_tess(pipeline) ? pipeline->shaders[MESA_SHADER_TESS_EVAL] :  pipeline->shaders[MESA_SHADER_VERTEX]);
+
+       struct ac_vs_output_info *outinfo = &vs->info.vs.outinfo;
+
+       pipeline->graphics.vgt_primitiveid_en = false;
+       pipeline->graphics.vgt_gs_mode = 0;
+
+       if (radv_pipeline_has_gs(pipeline)) {
+               pipeline->graphics.vgt_gs_mode = si_vgt_gs_mode(pipeline->shaders[MESA_SHADER_GEOMETRY]);
+       } else if (outinfo->export_prim_id) {
+               pipeline->graphics.vgt_gs_mode = S_028A40_MODE(V_028A40_GS_SCENARIO_A);
+               pipeline->graphics.vgt_primitiveid_en = true;
+       }
+}
+
 static void calculate_pa_cl_vs_out_cntl(struct radv_pipeline *pipeline)
 {
        struct radv_shader_variant *vs;
@@ -1870,6 +1909,25 @@ static void calculate_pa_cl_vs_out_cntl(struct radv_pipeline *pipeline)
                clip_dist_mask;
 
 }
+
+static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade)
+{
+       uint32_t ps_input_cntl;
+       if (offset <= AC_EXP_PARAM_OFFSET_31) {
+               ps_input_cntl = S_028644_OFFSET(offset);
+               if (flat_shade)
+                       ps_input_cntl |= S_028644_FLAT_SHADE(1);
+       } else {
+               /* The input is a DEFAULT_VAL constant. */
+               assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
+                      offset <= AC_EXP_PARAM_DEFAULT_VAL_1111);
+               offset -= AC_EXP_PARAM_DEFAULT_VAL_0000;
+               ps_input_cntl = S_028644_OFFSET(0x20) |
+                       S_028644_DEFAULT_VAL(offset);
+       }
+       return ps_input_cntl;
+}
+
 static void calculate_ps_inputs(struct radv_pipeline *pipeline)
 {
        struct radv_shader_variant *ps, *vs;
@@ -1882,24 +1940,20 @@ static void calculate_ps_inputs(struct radv_pipeline *pipeline)
 
        unsigned ps_offset = 0;
 
-       if (ps->info.fs.prim_id_input && (outinfo->prim_id_output != 0xffffffff)) {
-               unsigned vs_offset, flat_shade;
-               unsigned val;
-               vs_offset = outinfo->prim_id_output;
-               flat_shade = true;
-               val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
-               pipeline->graphics.ps_input_cntl[ps_offset] = val;
-               ++ps_offset;
+       if (ps->info.fs.prim_id_input) {
+               unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
+               if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
+                       pipeline->graphics.ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
+                       ++ps_offset;
+               }
        }
 
-       if (ps->info.fs.layer_input && (outinfo->layer_output != 0xffffffff)) {
-               unsigned vs_offset, flat_shade;
-               unsigned val;
-               vs_offset = outinfo->layer_output;
-               flat_shade = true;
-               val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
-               pipeline->graphics.ps_input_cntl[ps_offset] = val;
-               ++ps_offset;
+       if (ps->info.fs.layer_input) {
+               unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
+               if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
+                       pipeline->graphics.ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
+                       ++ps_offset;
+               }
        }
 
        if (ps->info.fs.has_pcoord) {
@@ -1910,31 +1964,21 @@ static void calculate_ps_inputs(struct radv_pipeline *pipeline)
        }
 
        for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {
-               unsigned vs_offset, flat_shade;
-               unsigned val;
-
+               unsigned vs_offset;
+               bool flat_shade;
                if (!(ps->info.fs.input_mask & (1u << i)))
                        continue;
 
-               if (!(outinfo->export_mask & (1u << i))) {
+               vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VAR0 + i];
+               if (vs_offset == AC_EXP_PARAM_UNDEFINED) {
                        pipeline->graphics.ps_input_cntl[ps_offset] = S_028644_OFFSET(0x20);
                        ++ps_offset;
                        continue;
                }
 
-               vs_offset = util_bitcount(outinfo->export_mask & ((1u << i) - 1));
-               if (outinfo->prim_id_output != 0xffffffff) {
-                       if (vs_offset >= outinfo->prim_id_output)
-                               vs_offset++;
-               }
-               if (outinfo->layer_output != 0xffffffff) {
-                       if (vs_offset >= outinfo->layer_output)
-                         vs_offset++;
-               }
                flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));
 
-               val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
-               pipeline->graphics.ps_input_cntl[ps_offset] = val;
+               pipeline->graphics.ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade);
                ++ps_offset;
        }
 
@@ -1969,14 +2013,44 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 
        radv_pipeline_init_blend_state(pipeline, pCreateInfo, extra);
 
+       if (!modules[MESA_SHADER_FRAGMENT]) {
+               nir_builder fs_b;
+               nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
+               fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "noop_fs");
+               fs_m.nir = fs_b.shader;
+               modules[MESA_SHADER_FRAGMENT] = &fs_m;
+       }
+
+       if (modules[MESA_SHADER_FRAGMENT]) {
+               union ac_shader_variant_key key;
+               key.fs.col_format = pipeline->graphics.blend.spi_shader_col_format;
+               key.fs.is_int8 = radv_pipeline_compute_is_int8(pCreateInfo);
+
+               const VkPipelineShaderStageCreateInfo *stage = pStages[MESA_SHADER_FRAGMENT];
+
+               pipeline->shaders[MESA_SHADER_FRAGMENT] =
+                        radv_pipeline_compile(pipeline, cache, modules[MESA_SHADER_FRAGMENT],
+                                              stage ? stage->pName : "main",
+                                              MESA_SHADER_FRAGMENT,
+                                              stage ? stage->pSpecializationInfo : NULL,
+                                              pipeline->layout, &key);
+               pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_FRAGMENT);
+       }
+
+       if (fs_m.nir)
+               ralloc_free(fs_m.nir);
+
        if (modules[MESA_SHADER_VERTEX]) {
                bool as_es = false;
                bool as_ls = false;
+               bool export_prim_id = false;
                if (modules[MESA_SHADER_TESS_CTRL])
                        as_ls = true;
                else if (modules[MESA_SHADER_GEOMETRY])
                        as_es = true;
-               union ac_shader_variant_key key = radv_compute_vs_key(pCreateInfo, as_es, as_ls);
+               else if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.prim_id_input)
+                       export_prim_id = true;
+               union ac_shader_variant_key key = radv_compute_vs_key(pCreateInfo, as_es, as_ls, export_prim_id);
 
                pipeline->shaders[MESA_SHADER_VERTEX] =
                         radv_pipeline_compile(pipeline, cache, modules[MESA_SHADER_VERTEX],
@@ -1989,7 +2063,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
        }
 
        if (modules[MESA_SHADER_GEOMETRY]) {
-               union ac_shader_variant_key key = radv_compute_vs_key(pCreateInfo, false, false);
+               union ac_shader_variant_key key = radv_compute_vs_key(pCreateInfo, false, false, false);
 
                pipeline->shaders[MESA_SHADER_GEOMETRY] =
                         radv_pipeline_compile(pipeline, cache, modules[MESA_SHADER_GEOMETRY],
@@ -1999,10 +2073,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
                                               pipeline->layout, &key);
 
                pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_GEOMETRY);
-
-               pipeline->graphics.vgt_gs_mode = si_vgt_gs_mode(pipeline->shaders[MESA_SHADER_GEOMETRY]);
-       } else
-               pipeline->graphics.vgt_gs_mode = 0;
+       }
 
        if (modules[MESA_SHADER_TESS_EVAL]) {
                assert(modules[MESA_SHADER_TESS_CTRL]);
@@ -2021,33 +2092,6 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
                        mesa_to_vk_shader_stage(MESA_SHADER_TESS_CTRL);
        }
 
-       if (!modules[MESA_SHADER_FRAGMENT]) {
-               nir_builder fs_b;
-               nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
-               fs_b.shader->info->name = ralloc_strdup(fs_b.shader, "noop_fs");
-               fs_m.nir = fs_b.shader;
-               modules[MESA_SHADER_FRAGMENT] = &fs_m;
-       }
-
-       if (modules[MESA_SHADER_FRAGMENT]) {
-               union ac_shader_variant_key key;
-               key.fs.col_format = pipeline->graphics.blend.spi_shader_col_format;
-               key.fs.is_int8 = radv_pipeline_compute_is_int8(pCreateInfo);
-
-               const VkPipelineShaderStageCreateInfo *stage = pStages[MESA_SHADER_FRAGMENT];
-
-               pipeline->shaders[MESA_SHADER_FRAGMENT] =
-                        radv_pipeline_compile(pipeline, cache, modules[MESA_SHADER_FRAGMENT],
-                                              stage ? stage->pName : "main",
-                                              MESA_SHADER_FRAGMENT,
-                                              stage ? stage->pSpecializationInfo : NULL,
-                                              pipeline->layout, &key);
-               pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_FRAGMENT);
-       }
-
-       if (fs_m.nir)
-               ralloc_free(fs_m.nir);
-
        radv_pipeline_init_depth_stencil_state(pipeline, pCreateInfo, extra);
        radv_pipeline_init_raster_state(pipeline, pCreateInfo);
        radv_pipeline_init_multisample_state(pipeline, pCreateInfo);
@@ -2111,6 +2155,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
                ps->info.fs.writes_z ? V_028710_SPI_SHADER_32_R :
                V_028710_SPI_SHADER_ZERO;
 
+       calculate_vgt_gs_mode(pipeline);
        calculate_pa_cl_vs_out_cntl(pipeline);
        calculate_ps_inputs(pipeline);
 
@@ -2131,10 +2176,15 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
                                S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
                else
                        stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
+
        } else if (radv_pipeline_has_gs(pipeline))
                stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
                        S_028B54_GS_EN(1) |
                        S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
+
+       if (device->physical_device->rad_info.chip_class >= GFX9)
+               stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
+
        pipeline->graphics.vgt_shader_stages_en = stages;
 
        if (radv_pipeline_has_gs(pipeline))
@@ -2182,6 +2232,16 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
                pipeline->binding_stride[desc->binding] = desc->stride;
        }
 
+       struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX,
+                                                            AC_UD_VS_BASE_VERTEX_START_INSTANCE);
+       if (loc->sgpr_idx != -1) {
+               pipeline->graphics.vtx_base_sgpr = radv_shader_stage_to_user_data_0(MESA_SHADER_VERTEX, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
+               pipeline->graphics.vtx_base_sgpr += loc->sgpr_idx * 4;
+               if (pipeline->shaders[MESA_SHADER_VERTEX]->info.info.vs.needs_draw_id)
+                       pipeline->graphics.vtx_emit_num = 3;
+               else
+                       pipeline->graphics.vtx_emit_num = 2;
+       }
        if (device->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) {
                radv_dump_pipeline_stats(device, pipeline);
        }