return hash_flags;
}
-static VkResult
-radv_pipeline_scratch_init(struct radv_device *device,
+static void
+radv_pipeline_init_scratch(struct radv_device *device,
struct radv_pipeline *pipeline)
{
unsigned scratch_bytes_per_wave = 0;
pipeline->scratch_bytes_per_wave = scratch_bytes_per_wave;
pipeline->max_waves = max_waves;
- return VK_SUCCESS;
}
static uint32_t si_translate_blend_logic_op(VkLogicOp op)
return states;
}
+static struct radv_ia_multi_vgt_param_helpers
+radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline)
+{
+ struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param = {0};
+ const struct radv_device *device = pipeline->device;
+
+ if (radv_pipeline_has_tess(pipeline))
+ ia_multi_vgt_param.primgroup_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_patches;
+ else if (radv_pipeline_has_gs(pipeline))
+ ia_multi_vgt_param.primgroup_size = 64;
+ else
+ ia_multi_vgt_param.primgroup_size = 128; /* recommended without a GS */
+
+ /* GS requirement. */
+ ia_multi_vgt_param.partial_es_wave = false;
+ if (radv_pipeline_has_gs(pipeline) && device->physical_device->rad_info.chip_class <= GFX8)
+ if (SI_GS_PER_ES / ia_multi_vgt_param.primgroup_size >= pipeline->device->gs_table_depth - 3)
+ ia_multi_vgt_param.partial_es_wave = true;
+
+ ia_multi_vgt_param.ia_switch_on_eoi = false;
+ if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.prim_id_input)
+ ia_multi_vgt_param.ia_switch_on_eoi = true;
+ if (radv_pipeline_has_gs(pipeline) &&
+ pipeline->shaders[MESA_SHADER_GEOMETRY]->info.uses_prim_id)
+ ia_multi_vgt_param.ia_switch_on_eoi = true;
+ if (radv_pipeline_has_tess(pipeline)) {
+ /* SWITCH_ON_EOI must be set if PrimID is used. */
+ if (pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id ||
+ radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.uses_prim_id)
+ ia_multi_vgt_param.ia_switch_on_eoi = true;
+ }
+
+ ia_multi_vgt_param.partial_vs_wave = false;
+ if (radv_pipeline_has_tess(pipeline)) {
+ /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */
+ if ((device->physical_device->rad_info.family == CHIP_TAHITI ||
+ device->physical_device->rad_info.family == CHIP_PITCAIRN ||
+ device->physical_device->rad_info.family == CHIP_BONAIRE) &&
+ radv_pipeline_has_gs(pipeline))
+ ia_multi_vgt_param.partial_vs_wave = true;
+ /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
+ if (device->physical_device->rad_info.has_distributed_tess) {
+ if (radv_pipeline_has_gs(pipeline)) {
+ if (device->physical_device->rad_info.chip_class <= GFX8)
+ ia_multi_vgt_param.partial_es_wave = true;
+ } else {
+ ia_multi_vgt_param.partial_vs_wave = true;
+ }
+ }
+ }
+
+ if (radv_pipeline_has_gs(pipeline)) {
+ /* On these chips there is the possibility of a hang if the
+ * pipeline uses a GS and partial_vs_wave is not set.
+ *
+ * This mostly does not hit 4-SE chips, as those typically set
+ * ia_switch_on_eoi and then partial_vs_wave is set for pipelines
+ * with GS due to another workaround.
+ *
+ * Reproducer: https://bugs.freedesktop.org/show_bug.cgi?id=109242
+ */
+ if (device->physical_device->rad_info.family == CHIP_TONGA ||
+ device->physical_device->rad_info.family == CHIP_FIJI ||
+ device->physical_device->rad_info.family == CHIP_POLARIS10 ||
+ device->physical_device->rad_info.family == CHIP_POLARIS11 ||
+ device->physical_device->rad_info.family == CHIP_POLARIS12 ||
+ device->physical_device->rad_info.family == CHIP_VEGAM) {
+ ia_multi_vgt_param.partial_vs_wave = true;
+ }
+ }
+
+ ia_multi_vgt_param.base =
+ S_028AA8_PRIMGROUP_SIZE(ia_multi_vgt_param.primgroup_size - 1) |
+ /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
+ S_028AA8_MAX_PRIMGRP_IN_WAVE(device->physical_device->rad_info.chip_class == GFX8 ? 2 : 0) |
+ S_030960_EN_INST_OPT_BASIC(device->physical_device->rad_info.chip_class >= GFX9) |
+ S_030960_EN_INST_OPT_ADV(device->physical_device->rad_info.chip_class >= GFX9);
+
+ return ia_multi_vgt_param;
+}
+
static void
radv_pipeline_init_input_assembly_state(struct radv_pipeline *pipeline,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
if (extra && extra->use_rectlist) {
pipeline->graphics.can_use_guardband = true;
}
+
+ pipeline->graphics.ia_multi_vgt_param =
+ radv_compute_ia_multi_vgt_param_helpers(pipeline);
}
static void
pipeline->dynamic_state.mask = states;
}
+static void
+radv_pipeline_init_raster_state(struct radv_pipeline *pipeline,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo)
+{
+ const VkPipelineRasterizationStateCreateInfo *raster_info =
+ pCreateInfo->pRasterizationState;
+
+ pipeline->graphics.pa_su_sc_mode_cntl =
+ S_028814_FACE(raster_info->frontFace) |
+ S_028814_CULL_FRONT(!!(raster_info->cullMode & VK_CULL_MODE_FRONT_BIT)) |
+ S_028814_CULL_BACK(!!(raster_info->cullMode & VK_CULL_MODE_BACK_BIT)) |
+ S_028814_POLY_MODE(raster_info->polygonMode != VK_POLYGON_MODE_FILL) |
+ S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(raster_info->polygonMode)) |
+ S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(raster_info->polygonMode)) |
+ S_028814_POLY_OFFSET_FRONT_ENABLE(raster_info->depthBiasEnable ? 1 : 0) |
+ S_028814_POLY_OFFSET_BACK_ENABLE(raster_info->depthBiasEnable ? 1 : 0) |
+ S_028814_POLY_OFFSET_PARA_ENABLE(raster_info->depthBiasEnable ? 1 : 0);
+}
+
+static void
+radv_pipeline_init_depth_stencil_state(struct radv_pipeline *pipeline,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo)
+{
+ const VkPipelineDepthStencilStateCreateInfo *ds_info
+ = radv_pipeline_get_depth_stencil_state(pCreateInfo);
+ RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+ struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+ struct radv_render_pass_attachment *attachment = NULL;
+ uint32_t db_depth_control = 0;
+
+ if (subpass->depth_stencil_attachment)
+ attachment = pass->attachments + subpass->depth_stencil_attachment->attachment;
+
+ bool has_depth_attachment = attachment && vk_format_is_depth(attachment->format);
+ bool has_stencil_attachment = attachment && vk_format_is_stencil(attachment->format);
+
+ if (ds_info) {
+ if (has_depth_attachment) {
+ db_depth_control = S_028800_Z_ENABLE(ds_info->depthTestEnable ? 1 : 0) |
+ S_028800_Z_WRITE_ENABLE(ds_info->depthWriteEnable ? 1 : 0) |
+ S_028800_ZFUNC(ds_info->depthCompareOp) |
+ S_028800_DEPTH_BOUNDS_ENABLE(ds_info->depthBoundsTestEnable ? 1 : 0);
+ }
+
+ if (has_stencil_attachment && ds_info->stencilTestEnable) {
+ db_depth_control |= S_028800_STENCIL_ENABLE(1) | S_028800_BACKFACE_ENABLE(1);
+ db_depth_control |= S_028800_STENCILFUNC(ds_info->front.compareOp);
+ db_depth_control |= S_028800_STENCILFUNC_BF(ds_info->back.compareOp);
+ }
+ }
+
+ pipeline->graphics.db_depth_control = db_depth_control;
+}
+
static void
gfx9_get_gs_info(const struct radv_pipeline_key *key,
const struct radv_pipeline *pipeline,
unsigned gsprim_lds_size = 0;
/* All these are per subgroup: */
+ const unsigned min_esverts = pipeline->device->physical_device->rad_info.chip_class >= GFX10_3 ? 29 : 24;
bool max_vert_out_per_gs_instance = false;
unsigned max_esverts_base = 256;
unsigned max_gsprims_base = 128; /* default prim group size clamp */
}
/* Hardware restriction: minimum value of max_esverts */
- max_esverts = MAX2(max_esverts, 23 + max_verts_per_prim);
+ max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
unsigned max_out_vertices =
max_vert_out_per_gs_instance ? gs_info->gs.vertices_out :
pipeline->graphics.esgs_ring_size = ngg->esgs_ring_size;
- assert(ngg->hw_max_esverts >= 24); /* HW limitation */
+ assert(ngg->hw_max_esverts >= min_esverts); /* HW limitation */
}
static void
-calculate_gs_ring_sizes(struct radv_pipeline *pipeline,
- const struct gfx9_gs_info *gs)
+radv_pipeline_init_gs_ring_state(struct radv_pipeline *pipeline,
+ const struct gfx9_gs_info *gs)
{
struct radv_device *device = pipeline->device;
unsigned num_se = device->physical_device->rad_info.max_se;
}
struct radv_shader_variant *
-radv_get_shader(struct radv_pipeline *pipeline,
+radv_get_shader(const struct radv_pipeline *pipeline,
gl_shader_stage stage)
{
if (stage == MESA_SHADER_VERTEX) {
pipeline->layout,
&keys[MESA_SHADER_FRAGMENT],
&infos[MESA_SHADER_FRAGMENT],
- pipeline->device->physical_device->use_llvm);
+ radv_use_llvm_for_stage(pipeline->device, MESA_SHADER_FRAGMENT));
/* TODO: These are no longer used as keys we should refactor this */
keys[MESA_SHADER_VERTEX].vs_common_out.export_prim_id =
radv_nir_shader_info_pass(combined_nir[i],
pipeline->layout, &key,
&infos[MESA_SHADER_TESS_CTRL],
- pipeline->device->physical_device->use_llvm);
+ radv_use_llvm_for_stage(pipeline->device, MESA_SHADER_TESS_CTRL));
}
keys[MESA_SHADER_TESS_EVAL].tes.num_patches =
pipeline->layout,
&keys[pre_stage],
&infos[MESA_SHADER_GEOMETRY],
- pipeline->device->physical_device->use_llvm);
+ radv_use_llvm_for_stage(pipeline->device, MESA_SHADER_GEOMETRY));
}
filled_stages |= (1 << pre_stage);
radv_nir_shader_info_init(&infos[i]);
radv_nir_shader_info_pass(nir[i], pipeline->layout,
- &keys[i], &infos[i], pipeline->device->physical_device->use_llvm);
+ &keys[i], &infos[i],
+ radv_use_llvm_for_stage(pipeline->device, i));
}
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
/* do this again since information such as outputs_read can be out-of-date */
nir_shader_gather_info(nir[i], nir_shader_get_entrypoint(nir[i]));
- if (device->physical_device->use_llvm) {
+ if (radv_use_llvm_for_stage(device, i)) {
NIR_PASS_V(nir[i], nir_lower_bool_to_int32);
} else {
NIR_PASS_V(nir[i], nir_lower_non_uniform_access,
nir_lower_non_uniform_texture_access |
nir_lower_non_uniform_image_access);
}
+ NIR_PASS_V(nir[i], nir_lower_memory_model);
}
}
radv_nir_shader_info_pass(nir[MESA_SHADER_GEOMETRY],
pipeline->layout, &key,
- &info, pipeline->device->physical_device->use_llvm);
+ &info,
+ radv_use_llvm_for_stage(pipeline->device, MESA_SHADER_GEOMETRY));
info.wave_size = 64; /* Wave32 not supported. */
info.ballot_bit_size = 64;
if (nir[i]) {
ralloc_free(nir[i]);
- if (radv_can_dump_shader_stats(device, modules[i]))
- radv_shader_dump_stats(device,
- pipeline->shaders[i],
- i, stderr);
+ if (radv_can_dump_shader_stats(device, modules[i])) {
+ radv_dump_shader_stats(device, pipeline, i, stderr);
+ }
}
}
}
static void
-radv_pipeline_generate_disabled_binning_state(struct radeon_cmdbuf *ctx_cs,
- struct radv_pipeline *pipeline,
- const VkGraphicsPipelineCreateInfo *pCreateInfo)
+radv_pipeline_init_disabled_binning_state(struct radv_pipeline *pipeline,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
uint32_t pa_sc_binner_cntl_0 =
S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
}
static void
-radv_pipeline_generate_binning_state(struct radeon_cmdbuf *ctx_cs,
- struct radv_pipeline *pipeline,
- const VkGraphicsPipelineCreateInfo *pCreateInfo,
- const struct radv_blend_state *blend)
+radv_pipeline_init_binning_state(struct radv_pipeline *pipeline,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ const struct radv_blend_state *blend)
{
if (pipeline->device->physical_device->rad_info.chip_class < GFX9)
return;
pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
pipeline->graphics.binning.db_dfsm_control = db_dfsm_control;
} else
- radv_pipeline_generate_disabled_binning_state(ctx_cs, pipeline, pCreateInfo);
+ radv_pipeline_init_disabled_binning_state(pipeline, pCreateInfo);
}
static void
radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs,
- struct radv_pipeline *pipeline,
+ const struct radv_pipeline *pipeline,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
const struct radv_graphics_pipeline_create_info *extra)
{
struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
struct radv_render_pass_attachment *attachment = NULL;
- uint32_t db_depth_control = 0;
uint32_t db_render_control = 0, db_render_override2 = 0;
uint32_t db_render_override = 0;
attachment = pass->attachments + subpass->depth_stencil_attachment->attachment;
bool has_depth_attachment = attachment && vk_format_is_depth(attachment->format);
- bool has_stencil_attachment = attachment && vk_format_is_stencil(attachment->format);
if (vkds && has_depth_attachment) {
- db_depth_control = S_028800_Z_ENABLE(vkds->depthTestEnable ? 1 : 0) |
- S_028800_Z_WRITE_ENABLE(vkds->depthWriteEnable ? 1 : 0) |
- S_028800_ZFUNC(vkds->depthCompareOp) |
- S_028800_DEPTH_BOUNDS_ENABLE(vkds->depthBoundsTestEnable ? 1 : 0);
-
/* from amdvlk: For 4xAA and 8xAA need to decompress on flush for better performance */
db_render_override2 |= S_028010_DECOMPRESS_Z_ON_FLUSH(attachment->samples > 2);
db_render_override2 |= S_028010_CENTROID_COMPUTATION_MODE_GFX103(2);
}
- if (has_stencil_attachment && vkds && vkds->stencilTestEnable) {
- db_depth_control |= S_028800_STENCIL_ENABLE(1) | S_028800_BACKFACE_ENABLE(1);
- db_depth_control |= S_028800_STENCILFUNC(vkds->front.compareOp);
-
- db_depth_control |= S_028800_STENCILFUNC_BF(vkds->back.compareOp);
- }
-
if (attachment && extra) {
db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(extra->db_depth_clear);
db_render_control |= S_028000_STENCIL_CLEAR_ENABLE(extra->db_stencil_clear);
radeon_set_context_reg(ctx_cs, R_028000_DB_RENDER_CONTROL, db_render_control);
radeon_set_context_reg(ctx_cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override);
radeon_set_context_reg(ctx_cs, R_028010_DB_RENDER_OVERRIDE2, db_render_override2);
-
- pipeline->graphics.db_depth_control = db_depth_control;
}
static void
radv_pipeline_generate_blend_state(struct radeon_cmdbuf *ctx_cs,
- struct radv_pipeline *pipeline,
+ const struct radv_pipeline *pipeline,
const struct radv_blend_state *blend)
{
radeon_set_context_reg_seq(ctx_cs, R_028780_CB_BLEND0_CONTROL, 8);
radeon_set_context_reg(ctx_cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask);
radeon_set_context_reg(ctx_cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask);
-
- pipeline->graphics.col_format = blend->spi_shader_col_format;
- pipeline->graphics.cb_target_mask = blend->cb_target_mask;
}
static void
radv_pipeline_generate_raster_state(struct radeon_cmdbuf *ctx_cs,
- struct radv_pipeline *pipeline,
+ const struct radv_pipeline *pipeline,
const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
const VkPipelineRasterizationStateCreateInfo *vkraster = pCreateInfo->pRasterizationState;
S_028810_DX_RASTERIZATION_KILL(vkraster->rasterizerDiscardEnable ? 1 : 0) |
S_028810_DX_LINEAR_ATTR_CLIP_ENA(1));
- pipeline->graphics.pa_su_sc_mode_cntl =
- S_028814_FACE(vkraster->frontFace) |
- S_028814_CULL_FRONT(!!(vkraster->cullMode & VK_CULL_MODE_FRONT_BIT)) |
- S_028814_CULL_BACK(!!(vkraster->cullMode & VK_CULL_MODE_BACK_BIT)) |
- S_028814_POLY_MODE(vkraster->polygonMode != VK_POLYGON_MODE_FILL) |
- S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(vkraster->polygonMode)) |
- S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(vkraster->polygonMode)) |
- S_028814_POLY_OFFSET_FRONT_ENABLE(vkraster->depthBiasEnable ? 1 : 0) |
- S_028814_POLY_OFFSET_BACK_ENABLE(vkraster->depthBiasEnable ? 1 : 0) |
- S_028814_POLY_OFFSET_PARA_ENABLE(vkraster->depthBiasEnable ? 1 : 0);
-
radeon_set_context_reg(ctx_cs, R_028BDC_PA_SC_LINE_CNTL,
S_028BDC_DX10_DIAMOND_TEST_ENA(1));
static void
radv_pipeline_generate_multisample_state(struct radeon_cmdbuf *ctx_cs,
- struct radv_pipeline *pipeline)
+ const struct radv_pipeline *pipeline)
{
- struct radv_multisample_state *ms = &pipeline->graphics.ms;
+ const struct radv_multisample_state *ms = &pipeline->graphics.ms;
radeon_set_context_reg_seq(ctx_cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
radeon_emit(ctx_cs, ms->pa_sc_aa_mask[0]);
static void
radv_pipeline_generate_vgt_gs_mode(struct radeon_cmdbuf *ctx_cs,
- struct radv_pipeline *pipeline)
+ const struct radv_pipeline *pipeline)
{
const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
const struct radv_shader_variant *vs =
static void
radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs,
struct radeon_cmdbuf *cs,
- struct radv_pipeline *pipeline,
- struct radv_shader_variant *shader)
+ const struct radv_pipeline *pipeline,
+ const struct radv_shader_variant *shader)
{
uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
S_02881C_BYPASS_PRIM_RATE_COMBINER_GFX103(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3) |
+ S_02881C_BYPASS_VTX_RATE_COMBINER_GFX103(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3) |
cull_dist_mask << 8 |
clip_dist_mask);
static void
radv_pipeline_generate_hw_es(struct radeon_cmdbuf *cs,
- struct radv_pipeline *pipeline,
- struct radv_shader_variant *shader)
+ const struct radv_pipeline *pipeline,
+ const struct radv_shader_variant *shader)
{
uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
static void
radv_pipeline_generate_hw_ls(struct radeon_cmdbuf *cs,
- struct radv_pipeline *pipeline,
- struct radv_shader_variant *shader)
+ const struct radv_pipeline *pipeline,
+ const struct radv_shader_variant *shader)
{
unsigned num_lds_blocks = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_lds_blocks;
uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
static void
radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs,
struct radeon_cmdbuf *cs,
- struct radv_pipeline *pipeline,
- struct radv_shader_variant *shader)
+ const struct radv_pipeline *pipeline,
+ const struct radv_shader_variant *shader)
{
uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
gl_shader_stage es_type =
S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
S_02881C_BYPASS_PRIM_RATE_COMBINER_GFX103(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3) |
+ S_02881C_BYPASS_VTX_RATE_COMBINER_GFX103(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3) |
cull_dist_mask << 8 |
clip_dist_mask);
static void
radv_pipeline_generate_hw_hs(struct radeon_cmdbuf *cs,
- struct radv_pipeline *pipeline,
- struct radv_shader_variant *shader)
+ const struct radv_pipeline *pipeline,
+ const struct radv_shader_variant *shader)
{
uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
static void
radv_pipeline_generate_vertex_shader(struct radeon_cmdbuf *ctx_cs,
struct radeon_cmdbuf *cs,
- struct radv_pipeline *pipeline)
+ const struct radv_pipeline *pipeline)
{
struct radv_shader_variant *vs;
static void
radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *ctx_cs,
struct radeon_cmdbuf *cs,
- struct radv_pipeline *pipeline)
+ const struct radv_pipeline *pipeline)
{
struct radv_shader_variant *tes, *tcs;
static void
radv_pipeline_generate_tess_state(struct radeon_cmdbuf *ctx_cs,
- struct radv_pipeline *pipeline,
+ const struct radv_pipeline *pipeline,
const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
struct radv_shader_variant *tes = radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL);
static void
radv_pipeline_generate_hw_gs(struct radeon_cmdbuf *ctx_cs,
struct radeon_cmdbuf *cs,
- struct radv_pipeline *pipeline,
- struct radv_shader_variant *gs)
+ const struct radv_pipeline *pipeline,
+ const struct radv_shader_variant *gs)
{
const struct gfx9_gs_info *gs_state = &gs->info.gs_ring_info;
unsigned gs_max_out_vertices;
- uint8_t *num_components;
+ const uint8_t *num_components;
uint8_t max_stream;
unsigned offset;
uint64_t va;
static void
radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *ctx_cs,
struct radeon_cmdbuf *cs,
- struct radv_pipeline *pipeline)
+ const struct radv_pipeline *pipeline)
{
struct radv_shader_variant *gs;
static void
radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
- struct radv_pipeline *pipeline)
+ const struct radv_pipeline *pipeline)
{
struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
static void
radv_pipeline_generate_vgt_vertex_reuse(struct radeon_cmdbuf *ctx_cs,
- struct radv_pipeline *pipeline)
+ const struct radv_pipeline *pipeline)
{
if (pipeline->device->physical_device->rad_info.family < CHIP_POLARIS10 ||
pipeline->device->physical_device->rad_info.chip_class >= GFX10)
static void
radv_pipeline_generate_vgt_gs_out(struct radeon_cmdbuf *ctx_cs,
- struct radv_pipeline *pipeline,
+ const struct radv_pipeline *pipeline,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
const struct radv_graphics_pipeline_create_info *extra)
{
radv_pipeline_generate_fragment_shader(ctx_cs, cs, pipeline);
radv_pipeline_generate_ps_inputs(ctx_cs, pipeline);
radv_pipeline_generate_vgt_vertex_reuse(ctx_cs, pipeline);
- radv_pipeline_generate_binning_state(ctx_cs, pipeline, pCreateInfo, blend);
radv_pipeline_generate_vgt_shader_config(ctx_cs, pipeline);
radv_pipeline_generate_cliprect_rule(ctx_cs, pCreateInfo);
radv_pipeline_generate_vgt_gs_out(ctx_cs, pipeline, pCreateInfo, extra);
assert(cs->cdw <= cs->max_dw);
}
-static struct radv_ia_multi_vgt_param_helpers
-radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline)
-{
- struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param = {0};
- const struct radv_device *device = pipeline->device;
-
- if (radv_pipeline_has_tess(pipeline))
- ia_multi_vgt_param.primgroup_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_patches;
- else if (radv_pipeline_has_gs(pipeline))
- ia_multi_vgt_param.primgroup_size = 64;
- else
- ia_multi_vgt_param.primgroup_size = 128; /* recommended without a GS */
-
- /* GS requirement. */
- ia_multi_vgt_param.partial_es_wave = false;
- if (radv_pipeline_has_gs(pipeline) && device->physical_device->rad_info.chip_class <= GFX8)
- if (SI_GS_PER_ES / ia_multi_vgt_param.primgroup_size >= pipeline->device->gs_table_depth - 3)
- ia_multi_vgt_param.partial_es_wave = true;
-
- ia_multi_vgt_param.ia_switch_on_eoi = false;
- if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.prim_id_input)
- ia_multi_vgt_param.ia_switch_on_eoi = true;
- if (radv_pipeline_has_gs(pipeline) &&
- pipeline->shaders[MESA_SHADER_GEOMETRY]->info.uses_prim_id)
- ia_multi_vgt_param.ia_switch_on_eoi = true;
- if (radv_pipeline_has_tess(pipeline)) {
- /* SWITCH_ON_EOI must be set if PrimID is used. */
- if (pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id ||
- radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.uses_prim_id)
- ia_multi_vgt_param.ia_switch_on_eoi = true;
- }
-
- ia_multi_vgt_param.partial_vs_wave = false;
- if (radv_pipeline_has_tess(pipeline)) {
- /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */
- if ((device->physical_device->rad_info.family == CHIP_TAHITI ||
- device->physical_device->rad_info.family == CHIP_PITCAIRN ||
- device->physical_device->rad_info.family == CHIP_BONAIRE) &&
- radv_pipeline_has_gs(pipeline))
- ia_multi_vgt_param.partial_vs_wave = true;
- /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
- if (device->physical_device->rad_info.has_distributed_tess) {
- if (radv_pipeline_has_gs(pipeline)) {
- if (device->physical_device->rad_info.chip_class <= GFX8)
- ia_multi_vgt_param.partial_es_wave = true;
- } else {
- ia_multi_vgt_param.partial_vs_wave = true;
- }
- }
- }
-
- if (radv_pipeline_has_gs(pipeline)) {
- /* On these chips there is the possibility of a hang if the
- * pipeline uses a GS and partial_vs_wave is not set.
- *
- * This mostly does not hit 4-SE chips, as those typically set
- * ia_switch_on_eoi and then partial_vs_wave is set for pipelines
- * with GS due to another workaround.
- *
- * Reproducer: https://bugs.freedesktop.org/show_bug.cgi?id=109242
- */
- if (device->physical_device->rad_info.family == CHIP_TONGA ||
- device->physical_device->rad_info.family == CHIP_FIJI ||
- device->physical_device->rad_info.family == CHIP_POLARIS10 ||
- device->physical_device->rad_info.family == CHIP_POLARIS11 ||
- device->physical_device->rad_info.family == CHIP_POLARIS12 ||
- device->physical_device->rad_info.family == CHIP_VEGAM) {
- ia_multi_vgt_param.partial_vs_wave = true;
- }
- }
-
- ia_multi_vgt_param.base =
- S_028AA8_PRIMGROUP_SIZE(ia_multi_vgt_param.primgroup_size - 1) |
- /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
- S_028AA8_MAX_PRIMGRP_IN_WAVE(device->physical_device->rad_info.chip_class == GFX8 ? 2 : 0) |
- S_030960_EN_INST_OPT_BASIC(device->physical_device->rad_info.chip_class >= GFX9) |
- S_030960_EN_INST_OPT_ADV(device->physical_device->rad_info.chip_class >= GFX9);
-
- return ia_multi_vgt_param;
-}
-
-
static void
-radv_compute_vertex_input_state(struct radv_pipeline *pipeline,
- const VkGraphicsPipelineCreateInfo *pCreateInfo)
+radv_pipeline_init_vertex_input_state(struct radv_pipeline *pipeline,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
const VkPipelineVertexInputStateCreateInfo *vi_info =
pCreateInfo->pVertexInputState;
return NULL;
}
+static void
+radv_pipeline_init_shader_stages_state(struct radv_pipeline *pipeline)
+{
+ struct radv_device *device = pipeline->device;
+
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+ pipeline->user_data_0[i] =
+ radv_pipeline_stage_to_user_data_0(pipeline, i,
+ device->physical_device->rad_info.chip_class);
+
+ if (pipeline->shaders[i]) {
+ pipeline->need_indirect_descriptor_sets |= pipeline->shaders[i]->info.need_indirect_descriptor_sets;
+ }
+ }
+
+ struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX,
+ AC_UD_VS_BASE_VERTEX_START_INSTANCE);
+ if (loc->sgpr_idx != -1) {
+ pipeline->graphics.vtx_base_sgpr = pipeline->user_data_0[MESA_SHADER_VERTEX];
+ pipeline->graphics.vtx_base_sgpr += loc->sgpr_idx * 4;
+ if (radv_get_shader(pipeline, MESA_SHADER_VERTEX)->info.vs.needs_draw_id)
+ pipeline->graphics.vtx_emit_num = 3;
+ else
+ pipeline->graphics.vtx_emit_num = 2;
+ }
+}
+
static VkResult
radv_pipeline_init(struct radv_pipeline *pipeline,
struct radv_device *device,
radv_pipeline_init_multisample_state(pipeline, &blend, pCreateInfo);
radv_pipeline_init_input_assembly_state(pipeline, pCreateInfo, extra);
radv_pipeline_init_dynamic_state(pipeline, pCreateInfo, extra);
+ radv_pipeline_init_raster_state(pipeline, pCreateInfo);
+ radv_pipeline_init_depth_stencil_state(pipeline, pCreateInfo);
/* Ensure that some export memory is always allocated, for two reasons:
*
blend.cb_shader_mask = 0xf;
}
- for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
- if (pipeline->shaders[i]) {
- pipeline->need_indirect_descriptor_sets |= pipeline->shaders[i]->info.need_indirect_descriptor_sets;
- }
- }
+ pipeline->graphics.col_format = blend.spi_shader_col_format;
+ pipeline->graphics.cb_target_mask = blend.cb_target_mask;
if (radv_pipeline_has_gs(pipeline) && !radv_pipeline_has_ngg(pipeline)) {
struct radv_shader_variant *gs =
pipeline->shaders[MESA_SHADER_GEOMETRY];
- calculate_gs_ring_sizes(pipeline, &gs->info.gs_ring_info);
+ radv_pipeline_init_gs_ring_state(pipeline, &gs->info.gs_ring_info);
}
if (radv_pipeline_has_tess(pipeline)) {
pCreateInfo->pTessellationState->patchControlPoints;
}
- pipeline->graphics.ia_multi_vgt_param = radv_compute_ia_multi_vgt_param_helpers(pipeline);
-
- radv_compute_vertex_input_state(pipeline, pCreateInfo);
-
- for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++)
- pipeline->user_data_0[i] = radv_pipeline_stage_to_user_data_0(pipeline, i, device->physical_device->rad_info.chip_class);
-
- struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX,
- AC_UD_VS_BASE_VERTEX_START_INSTANCE);
- if (loc->sgpr_idx != -1) {
- pipeline->graphics.vtx_base_sgpr = pipeline->user_data_0[MESA_SHADER_VERTEX];
- pipeline->graphics.vtx_base_sgpr += loc->sgpr_idx * 4;
- if (radv_get_shader(pipeline, MESA_SHADER_VERTEX)->info.vs.needs_draw_id)
- pipeline->graphics.vtx_emit_num = 3;
- else
- pipeline->graphics.vtx_emit_num = 2;
- }
+ radv_pipeline_init_vertex_input_state(pipeline, pCreateInfo);
+ radv_pipeline_init_binning_state(pipeline, pCreateInfo, &blend);
+ radv_pipeline_init_shader_stages_state(pipeline);
+ radv_pipeline_init_scratch(device, pipeline);
/* Find the last vertex shader stage that eventually uses streamout. */
pipeline->streamout_shader = radv_pipeline_get_streamout_shader(pipeline);
- result = radv_pipeline_scratch_init(device, pipeline);
radv_pipeline_generate_pm4(pipeline, pCreateInfo, extra, &blend);
return result;
static void
radv_pipeline_generate_hw_cs(struct radeon_cmdbuf *cs,
- struct radv_pipeline *pipeline)
+ const struct radv_pipeline *pipeline)
{
struct radv_shader_variant *shader = pipeline->shaders[MESA_SHADER_COMPUTE];
uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
static void
radv_pipeline_generate_compute_state(struct radeon_cmdbuf *cs,
- struct radv_pipeline *pipeline)
+ const struct radv_pipeline *pipeline)
{
struct radv_shader_variant *shader = pipeline->shaders[MESA_SHADER_COMPUTE];
struct radv_device *device = pipeline->device;
pipeline->user_data_0[MESA_SHADER_COMPUTE] = radv_pipeline_stage_to_user_data_0(pipeline, MESA_SHADER_COMPUTE, device->physical_device->rad_info.chip_class);
pipeline->need_indirect_descriptor_sets |= pipeline->shaders[MESA_SHADER_COMPUTE]->info.need_indirect_descriptor_sets;
- result = radv_pipeline_scratch_init(device, pipeline);
- if (result != VK_SUCCESS) {
- radv_pipeline_destroy(device, pipeline, pAllocator);
- return result;
- }
+ radv_pipeline_init_scratch(device, pipeline);
radv_compute_generate_pm4(pipeline);
/* backend IR */
if (p < end) {
p->isText = true;
- if (pipeline->device->physical_device->use_llvm) {
+ if (radv_use_llvm_for_stage(pipeline->device, stage)) {
desc_copy(p->name, "LLVM IR");
desc_copy(p->description, "The LLVM IR after some optimizations");
} else {