radv: Move gs state out of pipeline.
[mesa.git] / src / amd / vulkan / radv_pipeline.c
index 7ad964bdb8d2513edd4d8afb2cb782b53d74e3ec..9212d461e557eab50218f1e72c9ea9a2b32e9810 100644 (file)
@@ -72,6 +72,13 @@ struct radv_tessellation_state {
        uint32_t tf_param;
 };
 
+struct radv_gs_state {
+       uint32_t vgt_gs_onchip_cntl;
+       uint32_t vgt_gs_max_prims_per_subgroup;
+       uint32_t vgt_esgs_ring_itemsize;
+       uint32_t lds_size;
+};
+
 static void
 radv_pipeline_destroy(struct radv_device *device,
                       struct radv_pipeline *pipeline,
@@ -1168,12 +1175,20 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
        pipeline->dynamic_state.mask = states;
 }
 
-static void calculate_gfx9_gs_info(const VkGraphicsPipelineCreateInfo *pCreateInfo,
-                                   struct radv_pipeline *pipeline)
+static struct radv_gs_state
+calculate_gs_info(const VkGraphicsPipelineCreateInfo *pCreateInfo,
+                       const struct radv_pipeline *pipeline)
 {
+       struct radv_gs_state gs = {0};
        struct ac_shader_variant_info *gs_info = &pipeline->shaders[MESA_SHADER_GEOMETRY]->info;
-       struct ac_es_output_info *es_info = radv_pipeline_has_tess(pipeline) ?
-               &gs_info->tes.es_info : &gs_info->vs.es_info;
+       struct ac_es_output_info *es_info;
+       if (pipeline->device->physical_device->rad_info.chip_class >= GFX9)
+               es_info = radv_pipeline_has_tess(pipeline) ? &gs_info->tes.es_info : &gs_info->vs.es_info;
+       else
+               es_info = radv_pipeline_has_tess(pipeline) ?
+                       &pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.es_info :
+                       &pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.es_info;
+
        unsigned gs_num_invocations = MAX2(gs_info->gs.invocations, 1);
        bool uses_adjacency;
        switch(pCreateInfo->pInputAssemblyState->topology) {
@@ -1271,19 +1286,19 @@ static void calculate_gfx9_gs_info(const VkGraphicsPipelineCreateInfo *pCreateIn
        uint32_t gs_prims_per_subgroup = gs_prims;
        uint32_t gs_inst_prims_in_subgroup = gs_prims * gs_num_invocations;
        uint32_t max_prims_per_subgroup = gs_inst_prims_in_subgroup * gs_info->gs.vertices_out;
-       pipeline->graphics.gs.lds_size = align(esgs_lds_size, 128) / 128;
-       pipeline->graphics.gs.vgt_gs_onchip_cntl =
-                              S_028A44_ES_VERTS_PER_SUBGRP(es_verts_per_subgroup) |
-                              S_028A44_GS_PRIMS_PER_SUBGRP(gs_prims_per_subgroup) |
-                              S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_inst_prims_in_subgroup);
-       pipeline->graphics.gs.vgt_gs_max_prims_per_subgroup =
-                              S_028A94_MAX_PRIMS_PER_SUBGROUP(max_prims_per_subgroup);
-       pipeline->graphics.gs.vgt_esgs_ring_itemsize  = esgs_itemsize;
+       gs.lds_size = align(esgs_lds_size, 128) / 128;
+       gs.vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(es_verts_per_subgroup) |
+                               S_028A44_GS_PRIMS_PER_SUBGRP(gs_prims_per_subgroup) |
+                               S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_inst_prims_in_subgroup);
+       gs.vgt_gs_max_prims_per_subgroup = S_028A94_MAX_PRIMS_PER_SUBGROUP(max_prims_per_subgroup);
+       gs.vgt_esgs_ring_itemsize  = esgs_itemsize;
        assert(max_prims_per_subgroup <= max_out_prims);
+
+       return gs;
 }
 
 static void
-calculate_gs_ring_sizes(struct radv_pipeline *pipeline)
+calculate_gs_ring_sizes(struct radv_pipeline *pipeline, const struct radv_gs_state *gs)
 {
        struct radv_device *device = pipeline->device;
        unsigned num_se = device->physical_device->rad_info.max_se;
@@ -1294,20 +1309,13 @@ calculate_gs_ring_sizes(struct radv_pipeline *pipeline)
        /* The maximum size is 63.999 MB per SE. */
        unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
        struct ac_shader_variant_info *gs_info = &pipeline->shaders[MESA_SHADER_GEOMETRY]->info;
-       struct ac_es_output_info *es_info;
-       if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) 
-               es_info = radv_pipeline_has_tess(pipeline) ? &gs_info->tes.es_info : &gs_info->vs.es_info;
-       else
-               es_info = radv_pipeline_has_tess(pipeline) ?
-                       &pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.es_info :
-                       &pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.es_info;
 
        /* Calculate the minimum size. */
-       unsigned min_esgs_ring_size = align(es_info->esgs_itemsize * gs_vertex_reuse *
+       unsigned min_esgs_ring_size = align(gs->vgt_esgs_ring_itemsize * 4 * gs_vertex_reuse *
                                            wave_size, alignment);
        /* These are recommended sizes, not minimum sizes. */
        unsigned esgs_ring_size = max_gs_waves * 2 * wave_size *
-               es_info->esgs_itemsize * gs_info->gs.vertices_in;
+               gs->vgt_esgs_ring_itemsize * 4 * gs_info->gs.vertices_in;
        unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size *
                gs_info->gs.max_gsvs_emit_size * 1; // no streams in VK (gs->max_gs_stream + 1);
 
@@ -1318,7 +1326,6 @@ calculate_gs_ring_sizes(struct radv_pipeline *pipeline)
        if (pipeline->device->physical_device->rad_info.chip_class <= VI)
                pipeline->graphics.esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size);
 
-       pipeline->graphics.gs.vgt_esgs_ring_itemsize = es_info->esgs_itemsize / 4;
        pipeline->graphics.gsvs_ring_size = MIN2(gsvs_ring_size, max_size);
 }
 
@@ -2660,7 +2667,8 @@ radv_pipeline_generate_tess_shaders(struct radeon_winsys_cs *cs,
 
 static void
 radv_pipeline_generate_geometry_shader(struct radeon_winsys_cs *cs,
-                                      struct radv_pipeline *pipeline)
+                                      struct radv_pipeline *pipeline,
+                                      const struct radv_gs_state *gs_state)
 {
        struct radv_shader_variant *gs;
        uint64_t va;
@@ -2693,7 +2701,7 @@ radv_pipeline_generate_geometry_shader(struct radeon_winsys_cs *cs,
                               S_028B90_ENABLE(gs_num_invocations > 0));
 
        radeon_set_context_reg(cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
-                              pipeline->graphics.gs.vgt_esgs_ring_itemsize);
+                              gs_state->vgt_esgs_ring_itemsize);
 
        va = radv_buffer_get_va(gs->bo) + gs->bo_offset;
 
@@ -2704,11 +2712,10 @@ radv_pipeline_generate_geometry_shader(struct radeon_winsys_cs *cs,
 
                radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
                radeon_emit(cs, gs->rsrc1);
-               radeon_emit(cs, gs->rsrc2 |
-                                           S_00B22C_LDS_SIZE(pipeline->graphics.gs.lds_size));
+               radeon_emit(cs, gs->rsrc2 | S_00B22C_LDS_SIZE(gs_state->lds_size));
 
-               radeon_set_context_reg(cs, R_028A44_VGT_GS_ONCHIP_CNTL, pipeline->graphics.gs.vgt_gs_onchip_cntl);
-               radeon_set_context_reg(cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, pipeline->graphics.gs.vgt_gs_max_prims_per_subgroup);
+               radeon_set_context_reg(cs, R_028A44_VGT_GS_ONCHIP_CNTL, gs_state->vgt_gs_onchip_cntl);
+               radeon_set_context_reg(cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, gs_state->vgt_gs_max_prims_per_subgroup);
        } else {
                radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4);
                radeon_emit(cs, va >> 8);
@@ -2957,7 +2964,8 @@ radv_pipeline_generate_pm4(struct radv_pipeline *pipeline,
                            const VkGraphicsPipelineCreateInfo *pCreateInfo,
                            const struct radv_graphics_pipeline_create_info *extra,
                            const struct radv_blend_state *blend,
-                           const struct radv_tessellation_state *tess)
+                           const struct radv_tessellation_state *tess,
+                           const struct radv_gs_state *gs)
 {
        pipeline->cs.buf = malloc(4 * 256);
        pipeline->cs.max_dw = 256;
@@ -2969,7 +2977,7 @@ radv_pipeline_generate_pm4(struct radv_pipeline *pipeline,
        radv_pipeline_generate_vgt_gs_mode(&pipeline->cs, pipeline);
        radv_pipeline_generate_vertex_shader(&pipeline->cs, pipeline, tess);
        radv_pipeline_generate_tess_shaders(&pipeline->cs, pipeline, tess);
-       radv_pipeline_generate_geometry_shader(&pipeline->cs, pipeline);
+       radv_pipeline_generate_geometry_shader(&pipeline->cs, pipeline, gs);
        radv_pipeline_generate_fragment_shader(&pipeline->cs, pipeline);
        radv_pipeline_generate_ps_inputs(&pipeline->cs, pipeline);
        radv_pipeline_generate_vgt_vertex_reuse(&pipeline->cs, pipeline);
@@ -3207,10 +3215,10 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
                }
        }
 
+       struct radv_gs_state gs = {0};
        if (radv_pipeline_has_gs(pipeline)) {
-               calculate_gs_ring_sizes(pipeline);
-               if (device->physical_device->rad_info.chip_class >= GFX9)
-                       calculate_gfx9_gs_info(pCreateInfo, pipeline);
+               gs = calculate_gs_info(pCreateInfo, pipeline);
+               calculate_gs_ring_sizes(pipeline, &gs);
        }
 
        struct radv_tessellation_state tess = {0};
@@ -3245,7 +3253,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
        }
 
        result = radv_pipeline_scratch_init(device, pipeline);
-       radv_pipeline_generate_pm4(pipeline, pCreateInfo, extra, &blend, &tess);
+       radv_pipeline_generate_pm4(pipeline, pCreateInfo, extra, &blend, &tess, &gs);
 
        return result;
 }