X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_pipeline.c;h=33d594f6c48cfd93e15e93f2a90bd2be4affb4f7;hb=2133e6420377f29052a60efd9471f4a946040515;hp=d7a05c03e2a19531035695337e230c22179e1ad7;hpb=067b01c5e61b9fd8654668c6388f9119bb179760;p=mesa.git diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index d7a05c03e2a..33d594f6c48 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -222,8 +222,8 @@ static uint32_t get_hash_flags(struct radv_device *device) return hash_flags; } -static VkResult -radv_pipeline_scratch_init(struct radv_device *device, +static void +radv_pipeline_init_scratch(struct radv_device *device, struct radv_pipeline *pipeline) { unsigned scratch_bytes_per_wave = 0; @@ -254,7 +254,6 @@ radv_pipeline_scratch_init(struct radv_device *device, pipeline->scratch_bytes_per_wave = scratch_bytes_per_wave; pipeline->max_waves = max_waves; - return VK_SUCCESS; } static uint32_t si_translate_blend_logic_op(VkLogicOp op) @@ -1048,6 +1047,17 @@ radv_pipeline_out_of_order_rast(struct radv_pipeline *pipeline, return true; } +static const VkConservativeRasterizationModeEXT +radv_get_conservative_raster_mode(const VkPipelineRasterizationStateCreateInfo *pCreateInfo) +{ + const VkPipelineRasterizationConservativeStateCreateInfoEXT *conservative_raster = + vk_find_struct_const(pCreateInfo->pNext, PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT); + + if (!conservative_raster) + return VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT; + return conservative_raster->conservativeRasterizationMode; +} + static void radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline, struct radv_blend_state *blend, @@ -1056,6 +1066,8 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline, const VkPipelineMultisampleStateCreateInfo *vkms = radv_pipeline_get_multisample_state(pCreateInfo); struct radv_multisample_state *ms = &pipeline->graphics.ms; unsigned num_tile_pipes = pipeline->device->physical_device->rad_info.num_tile_pipes; + const VkConservativeRasterizationModeEXT mode = + radv_get_conservative_raster_mode(pCreateInfo->pRasterizationState); bool out_of_order_rast = false; int ps_iter_samples = 1; uint32_t mask = 0xffff; @@ -1108,6 +1120,15 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline, S_028804_INCOHERENT_EQAA_READS(1) | S_028804_INTERPOLATE_COMP_Z(1) | S_028804_STATIC_ANCHOR_ASSOCIATIONS(1); + + /* Adjust MSAA state if conservative rasterization is enabled. */ + if (mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) { + ms->pa_sc_aa_config |= S_028BE0_AA_MASK_CENTROID_DTMN(1); + + ms->db_eqaa |= S_028804_ENABLE_POSTZ_OVERRASTERIZATION(1) | + S_028804_OVERRASTERIZATION_AMOUNT(4); + } + ms->pa_sc_mode_cntl_1 = S_028A4C_WALK_FENCE_ENABLE(1) | //TODO linear dst fixes S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) | @@ -1335,6 +1356,87 @@ static uint32_t radv_pipeline_needed_dynamic_state(const VkGraphicsPipelineCreat return states; } +static struct radv_ia_multi_vgt_param_helpers +radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline) +{ + struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param = {0}; + const struct radv_device *device = pipeline->device; + + if (radv_pipeline_has_tess(pipeline)) + ia_multi_vgt_param.primgroup_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_patches; + else if (radv_pipeline_has_gs(pipeline)) + ia_multi_vgt_param.primgroup_size = 64; + else + ia_multi_vgt_param.primgroup_size = 128; /* recommended without a GS */ + + /* GS requirement. */ + ia_multi_vgt_param.partial_es_wave = false; + if (radv_pipeline_has_gs(pipeline) && device->physical_device->rad_info.chip_class <= GFX8) + if (SI_GS_PER_ES / ia_multi_vgt_param.primgroup_size >= pipeline->device->gs_table_depth - 3) + ia_multi_vgt_param.partial_es_wave = true; + + ia_multi_vgt_param.ia_switch_on_eoi = false; + if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.prim_id_input) + ia_multi_vgt_param.ia_switch_on_eoi = true; + if (radv_pipeline_has_gs(pipeline) && + pipeline->shaders[MESA_SHADER_GEOMETRY]->info.uses_prim_id) + ia_multi_vgt_param.ia_switch_on_eoi = true; + if (radv_pipeline_has_tess(pipeline)) { + /* SWITCH_ON_EOI must be set if PrimID is used. */ + if (pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id || + radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.uses_prim_id) + ia_multi_vgt_param.ia_switch_on_eoi = true; + } + + ia_multi_vgt_param.partial_vs_wave = false; + if (radv_pipeline_has_tess(pipeline)) { + /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */ + if ((device->physical_device->rad_info.family == CHIP_TAHITI || + device->physical_device->rad_info.family == CHIP_PITCAIRN || + device->physical_device->rad_info.family == CHIP_BONAIRE) && + radv_pipeline_has_gs(pipeline)) + ia_multi_vgt_param.partial_vs_wave = true; + /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */ + if (device->physical_device->rad_info.has_distributed_tess) { + if (radv_pipeline_has_gs(pipeline)) { + if (device->physical_device->rad_info.chip_class <= GFX8) + ia_multi_vgt_param.partial_es_wave = true; + } else { + ia_multi_vgt_param.partial_vs_wave = true; + } + } + } + + if (radv_pipeline_has_gs(pipeline)) { + /* On these chips there is the possibility of a hang if the + * pipeline uses a GS and partial_vs_wave is not set. + * + * This mostly does not hit 4-SE chips, as those typically set + * ia_switch_on_eoi and then partial_vs_wave is set for pipelines + * with GS due to another workaround. + * + * Reproducer: https://bugs.freedesktop.org/show_bug.cgi?id=109242 + */ + if (device->physical_device->rad_info.family == CHIP_TONGA || + device->physical_device->rad_info.family == CHIP_FIJI || + device->physical_device->rad_info.family == CHIP_POLARIS10 || + device->physical_device->rad_info.family == CHIP_POLARIS11 || + device->physical_device->rad_info.family == CHIP_POLARIS12 || + device->physical_device->rad_info.family == CHIP_VEGAM) { + ia_multi_vgt_param.partial_vs_wave = true; + } + } + + ia_multi_vgt_param.base = + S_028AA8_PRIMGROUP_SIZE(ia_multi_vgt_param.primgroup_size - 1) | + /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */ + S_028AA8_MAX_PRIMGRP_IN_WAVE(device->physical_device->rad_info.chip_class == GFX8 ? 2 : 0) | + S_030960_EN_INST_OPT_BASIC(device->physical_device->rad_info.chip_class >= GFX9) | + S_030960_EN_INST_OPT_ADV(device->physical_device->rad_info.chip_class >= GFX9); + + return ia_multi_vgt_param; +} + static void radv_pipeline_init_input_assembly_state(struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo, @@ -1359,6 +1461,9 @@ radv_pipeline_init_input_assembly_state(struct radv_pipeline *pipeline, if (extra && extra->use_rectlist) { pipeline->graphics.can_use_guardband = true; } + + pipeline->graphics.ia_multi_vgt_param = + radv_compute_ia_multi_vgt_param_helpers(pipeline); } static void @@ -1585,6 +1690,60 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline, pipeline->dynamic_state.mask = states; } +static void +radv_pipeline_init_raster_state(struct radv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *pCreateInfo) +{ + const VkPipelineRasterizationStateCreateInfo *raster_info = + pCreateInfo->pRasterizationState; + + pipeline->graphics.pa_su_sc_mode_cntl = + S_028814_FACE(raster_info->frontFace) | + S_028814_CULL_FRONT(!!(raster_info->cullMode & VK_CULL_MODE_FRONT_BIT)) | + S_028814_CULL_BACK(!!(raster_info->cullMode & VK_CULL_MODE_BACK_BIT)) | + S_028814_POLY_MODE(raster_info->polygonMode != VK_POLYGON_MODE_FILL) | + S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(raster_info->polygonMode)) | + S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(raster_info->polygonMode)) | + S_028814_POLY_OFFSET_FRONT_ENABLE(raster_info->depthBiasEnable ? 1 : 0) | + S_028814_POLY_OFFSET_BACK_ENABLE(raster_info->depthBiasEnable ? 1 : 0) | + S_028814_POLY_OFFSET_PARA_ENABLE(raster_info->depthBiasEnable ? 1 : 0); +} + +static void +radv_pipeline_init_depth_stencil_state(struct radv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *pCreateInfo) +{ + const VkPipelineDepthStencilStateCreateInfo *ds_info + = radv_pipeline_get_depth_stencil_state(pCreateInfo); + RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); + struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass; + struct radv_render_pass_attachment *attachment = NULL; + uint32_t db_depth_control = 0; + + if (subpass->depth_stencil_attachment) + attachment = pass->attachments + subpass->depth_stencil_attachment->attachment; + + bool has_depth_attachment = attachment && vk_format_is_depth(attachment->format); + bool has_stencil_attachment = attachment && vk_format_is_stencil(attachment->format); + + if (ds_info) { + if (has_depth_attachment) { + db_depth_control = S_028800_Z_ENABLE(ds_info->depthTestEnable ? 1 : 0) | + S_028800_Z_WRITE_ENABLE(ds_info->depthWriteEnable ? 1 : 0) | + S_028800_ZFUNC(ds_info->depthCompareOp) | + S_028800_DEPTH_BOUNDS_ENABLE(ds_info->depthBoundsTestEnable ? 1 : 0); + } + + if (has_stencil_attachment && ds_info->stencilTestEnable) { + db_depth_control |= S_028800_STENCIL_ENABLE(1) | S_028800_BACKFACE_ENABLE(1); + db_depth_control |= S_028800_STENCILFUNC(ds_info->front.compareOp); + db_depth_control |= S_028800_STENCILFUNC_BF(ds_info->back.compareOp); + } + } + + pipeline->graphics.db_depth_control = db_depth_control; +} + static void gfx9_get_gs_info(const struct radv_pipeline_key *key, const struct radv_pipeline *pipeline, @@ -1780,6 +1939,7 @@ gfx10_get_ngg_info(const struct radv_pipeline_key *key, unsigned gsprim_lds_size = 0; /* All these are per subgroup: */ + const unsigned min_esverts = pipeline->device->physical_device->rad_info.chip_class >= GFX10_3 ? 29 : 24; bool max_vert_out_per_gs_instance = false; unsigned max_esverts_base = 256; unsigned max_gsprims_base = 128; /* default prim group size clamp */ @@ -1905,7 +2065,7 @@ gfx10_get_ngg_info(const struct radv_pipeline_key *key, } /* Hardware restriction: minimum value of max_esverts */ - max_esverts = MAX2(max_esverts, 23 + max_verts_per_prim); + max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim); unsigned max_out_vertices = max_vert_out_per_gs_instance ? gs_info->gs.vertices_out : @@ -1942,12 +2102,12 @@ gfx10_get_ngg_info(const struct radv_pipeline_key *key, pipeline->graphics.esgs_ring_size = ngg->esgs_ring_size; - assert(ngg->hw_max_esverts >= 24); /* HW limitation */ + assert(ngg->hw_max_esverts >= min_esverts); /* HW limitation */ } static void -calculate_gs_ring_sizes(struct radv_pipeline *pipeline, - const struct gfx9_gs_info *gs) +radv_pipeline_init_gs_ring_state(struct radv_pipeline *pipeline, + const struct gfx9_gs_info *gs) { struct radv_device *device = pipeline->device; unsigned num_se = device->physical_device->rad_info.max_se; @@ -1983,7 +2143,7 @@ calculate_gs_ring_sizes(struct radv_pipeline *pipeline, } struct radv_shader_variant * -radv_get_shader(struct radv_pipeline *pipeline, +radv_get_shader(const struct radv_pipeline *pipeline, gl_shader_stage stage) { if (stage == MESA_SHADER_VERTEX) { @@ -2465,7 +2625,7 @@ radv_fill_shader_info(struct radv_pipeline *pipeline, pipeline->layout, &keys[MESA_SHADER_FRAGMENT], &infos[MESA_SHADER_FRAGMENT], - pipeline->device->physical_device->use_llvm); + radv_use_llvm_for_stage(pipeline->device, MESA_SHADER_FRAGMENT)); /* TODO: These are no longer used as keys we should refactor this */ keys[MESA_SHADER_VERTEX].vs_common_out.export_prim_id = @@ -2517,7 +2677,7 @@ radv_fill_shader_info(struct radv_pipeline *pipeline, radv_nir_shader_info_pass(combined_nir[i], pipeline->layout, &key, &infos[MESA_SHADER_TESS_CTRL], - pipeline->device->physical_device->use_llvm); + radv_use_llvm_for_stage(pipeline->device, MESA_SHADER_TESS_CTRL)); } keys[MESA_SHADER_TESS_EVAL].tes.num_patches = @@ -2541,7 +2701,7 @@ radv_fill_shader_info(struct radv_pipeline *pipeline, pipeline->layout, &keys[pre_stage], &infos[MESA_SHADER_GEOMETRY], - pipeline->device->physical_device->use_llvm); + radv_use_llvm_for_stage(pipeline->device, MESA_SHADER_GEOMETRY)); } filled_stages |= (1 << pre_stage); @@ -2566,7 +2726,8 @@ radv_fill_shader_info(struct radv_pipeline *pipeline, radv_nir_shader_info_init(&infos[i]); radv_nir_shader_info_pass(nir[i], pipeline->layout, - &keys[i], &infos[i], pipeline->device->physical_device->use_llvm); + &keys[i], &infos[i], + radv_use_llvm_for_stage(pipeline->device, i)); } for (int i = 0; i < MESA_SHADER_STAGES; i++) { @@ -2776,7 +2937,7 @@ VkResult radv_create_shaders(struct radv_pipeline *pipeline, /* do this again since information such as outputs_read can be out-of-date */ nir_shader_gather_info(nir[i], nir_shader_get_entrypoint(nir[i])); - if (device->physical_device->use_llvm) { + if (radv_use_llvm_for_stage(device, i)) { NIR_PASS_V(nir[i], nir_lower_bool_to_int32); } else { NIR_PASS_V(nir[i], nir_lower_non_uniform_access, @@ -2785,6 +2946,7 @@ VkResult radv_create_shaders(struct radv_pipeline *pipeline, nir_lower_non_uniform_texture_access | nir_lower_non_uniform_image_access); } + NIR_PASS_V(nir[i], nir_lower_memory_model); } } @@ -2833,7 +2995,8 @@ VkResult radv_create_shaders(struct radv_pipeline *pipeline, radv_nir_shader_info_pass(nir[MESA_SHADER_GEOMETRY], pipeline->layout, &key, - &info, pipeline->device->physical_device->use_llvm); + &info, + radv_use_llvm_for_stage(pipeline->device, MESA_SHADER_GEOMETRY)); info.wave_size = 64; /* Wave32 not supported. */ info.ballot_bit_size = 64; @@ -3382,9 +3545,8 @@ radv_gfx10_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipe } static void -radv_pipeline_generate_disabled_binning_state(struct radeon_cmdbuf *ctx_cs, - struct radv_pipeline *pipeline, - const VkGraphicsPipelineCreateInfo *pCreateInfo) +radv_pipeline_init_disabled_binning_state(struct radv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *pCreateInfo) { uint32_t pa_sc_binner_cntl_0 = S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) | @@ -3454,10 +3616,9 @@ radv_get_binning_settings(const struct radv_physical_device *pdev) } static void -radv_pipeline_generate_binning_state(struct radeon_cmdbuf *ctx_cs, - struct radv_pipeline *pipeline, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct radv_blend_state *blend) +radv_pipeline_init_binning_state(struct radv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct radv_blend_state *blend) { if (pipeline->device->physical_device->rad_info.chip_class < GFX9) return; @@ -3502,13 +3663,13 @@ radv_pipeline_generate_binning_state(struct radeon_cmdbuf *ctx_cs, pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0; pipeline->graphics.binning.db_dfsm_control = db_dfsm_control; } else - radv_pipeline_generate_disabled_binning_state(ctx_cs, pipeline, pCreateInfo); + radv_pipeline_init_disabled_binning_state(pipeline, pCreateInfo); } static void radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs, - struct radv_pipeline *pipeline, + const struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct radv_graphics_pipeline_create_info *extra) { @@ -3517,7 +3678,6 @@ radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs, struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass; struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; struct radv_render_pass_attachment *attachment = NULL; - uint32_t db_depth_control = 0; uint32_t db_render_control = 0, db_render_override2 = 0; uint32_t db_render_override = 0; @@ -3525,14 +3685,8 @@ radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs, attachment = pass->attachments + subpass->depth_stencil_attachment->attachment; bool has_depth_attachment = attachment && vk_format_is_depth(attachment->format); - bool has_stencil_attachment = attachment && vk_format_is_stencil(attachment->format); if (vkds && has_depth_attachment) { - db_depth_control = S_028800_Z_ENABLE(vkds->depthTestEnable ? 1 : 0) | - S_028800_Z_WRITE_ENABLE(vkds->depthWriteEnable ? 1 : 0) | - S_028800_ZFUNC(vkds->depthCompareOp) | - S_028800_DEPTH_BOUNDS_ENABLE(vkds->depthBoundsTestEnable ? 1 : 0); - /* from amdvlk: For 4xAA and 8xAA need to decompress on flush for better performance */ db_render_override2 |= S_028010_DECOMPRESS_Z_ON_FLUSH(attachment->samples > 2); @@ -3540,13 +3694,6 @@ radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs, db_render_override2 |= S_028010_CENTROID_COMPUTATION_MODE_GFX103(2); } - if (has_stencil_attachment && vkds && vkds->stencilTestEnable) { - db_depth_control |= S_028800_STENCIL_ENABLE(1) | S_028800_BACKFACE_ENABLE(1); - db_depth_control |= S_028800_STENCILFUNC(vkds->front.compareOp); - - db_depth_control |= S_028800_STENCILFUNC_BF(vkds->back.compareOp); - } - if (attachment && extra) { db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(extra->db_depth_clear); db_render_control |= S_028000_STENCIL_CLEAR_ENABLE(extra->db_stencil_clear); @@ -3579,13 +3726,11 @@ radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs, radeon_set_context_reg(ctx_cs, R_028000_DB_RENDER_CONTROL, db_render_control); radeon_set_context_reg(ctx_cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override); radeon_set_context_reg(ctx_cs, R_028010_DB_RENDER_OVERRIDE2, db_render_override2); - - pipeline->graphics.db_depth_control = db_depth_control; } static void radv_pipeline_generate_blend_state(struct radeon_cmdbuf *ctx_cs, - struct radv_pipeline *pipeline, + const struct radv_pipeline *pipeline, const struct radv_blend_state *blend) { radeon_set_context_reg_seq(ctx_cs, R_028780_CB_BLEND0_CONTROL, 8); @@ -3604,25 +3749,11 @@ radv_pipeline_generate_blend_state(struct radeon_cmdbuf *ctx_cs, radeon_set_context_reg(ctx_cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask); radeon_set_context_reg(ctx_cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask); - - pipeline->graphics.col_format = blend->spi_shader_col_format; - pipeline->graphics.cb_target_mask = blend->cb_target_mask; -} - -static const VkConservativeRasterizationModeEXT -radv_get_conservative_raster_mode(const VkPipelineRasterizationStateCreateInfo *pCreateInfo) -{ - const VkPipelineRasterizationConservativeStateCreateInfoEXT *conservative_raster = - vk_find_struct_const(pCreateInfo->pNext, PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT); - - if (!conservative_raster) - return VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT; - return conservative_raster->conservativeRasterizationMode; } static void radv_pipeline_generate_raster_state(struct radeon_cmdbuf *ctx_cs, - struct radv_pipeline *pipeline, + const struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo) { const VkPipelineRasterizationStateCreateInfo *vkraster = pCreateInfo->pRasterizationState; @@ -3644,28 +3775,11 @@ radv_pipeline_generate_raster_state(struct radeon_cmdbuf *ctx_cs, S_028810_DX_RASTERIZATION_KILL(vkraster->rasterizerDiscardEnable ? 1 : 0) | S_028810_DX_LINEAR_ATTR_CLIP_ENA(1)); - pipeline->graphics.pa_su_sc_mode_cntl = - S_028814_FACE(vkraster->frontFace) | - S_028814_CULL_FRONT(!!(vkraster->cullMode & VK_CULL_MODE_FRONT_BIT)) | - S_028814_CULL_BACK(!!(vkraster->cullMode & VK_CULL_MODE_BACK_BIT)) | - S_028814_POLY_MODE(vkraster->polygonMode != VK_POLYGON_MODE_FILL) | - S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(vkraster->polygonMode)) | - S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(vkraster->polygonMode)) | - S_028814_POLY_OFFSET_FRONT_ENABLE(vkraster->depthBiasEnable ? 1 : 0) | - S_028814_POLY_OFFSET_BACK_ENABLE(vkraster->depthBiasEnable ? 1 : 0) | - S_028814_POLY_OFFSET_PARA_ENABLE(vkraster->depthBiasEnable ? 1 : 0); - radeon_set_context_reg(ctx_cs, R_028BDC_PA_SC_LINE_CNTL, S_028BDC_DX10_DIAMOND_TEST_ENA(1)); /* Conservative rasterization. */ if (mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) { - struct radv_multisample_state *ms = &pipeline->graphics.ms; - - ms->pa_sc_aa_config |= S_028BE0_AA_MASK_CENTROID_DTMN(1); - ms->db_eqaa |= S_028804_ENABLE_POSTZ_OVERRASTERIZATION(1) | - S_028804_OVERRASTERIZATION_AMOUNT(4); - pa_sc_conservative_rast = S_028C4C_PREZ_AA_MASK_ENABLE(1) | S_028C4C_POSTZ_AA_MASK_ENABLE(1) | S_028C4C_CENTROID_SAMPLE_OVERRIDE(1); @@ -3695,9 +3809,9 @@ radv_pipeline_generate_raster_state(struct radeon_cmdbuf *ctx_cs, static void radv_pipeline_generate_multisample_state(struct radeon_cmdbuf *ctx_cs, - struct radv_pipeline *pipeline) + const struct radv_pipeline *pipeline) { - struct radv_multisample_state *ms = &pipeline->graphics.ms; + const struct radv_multisample_state *ms = &pipeline->graphics.ms; radeon_set_context_reg_seq(ctx_cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); radeon_emit(ctx_cs, ms->pa_sc_aa_mask[0]); @@ -3726,7 +3840,7 @@ radv_pipeline_generate_multisample_state(struct radeon_cmdbuf *ctx_cs, static void radv_pipeline_generate_vgt_gs_mode(struct radeon_cmdbuf *ctx_cs, - struct radv_pipeline *pipeline) + const struct radv_pipeline *pipeline) { const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline); const struct radv_shader_variant *vs = @@ -3757,8 +3871,8 @@ radv_pipeline_generate_vgt_gs_mode(struct radeon_cmdbuf *ctx_cs, static void radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, - struct radv_pipeline *pipeline, - struct radv_shader_variant *shader) + const struct radv_pipeline *pipeline, + const struct radv_shader_variant *shader) { uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset; @@ -3809,6 +3923,7 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) | S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | S_02881C_BYPASS_PRIM_RATE_COMBINER_GFX103(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3) | + S_02881C_BYPASS_VTX_RATE_COMBINER_GFX103(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3) | cull_dist_mask << 8 | clip_dist_mask); @@ -3819,8 +3934,8 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, static void radv_pipeline_generate_hw_es(struct radeon_cmdbuf *cs, - struct radv_pipeline *pipeline, - struct radv_shader_variant *shader) + const struct radv_pipeline *pipeline, + const struct radv_shader_variant *shader) { uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset; @@ -3833,8 +3948,8 @@ radv_pipeline_generate_hw_es(struct radeon_cmdbuf *cs, static void radv_pipeline_generate_hw_ls(struct radeon_cmdbuf *cs, - struct radv_pipeline *pipeline, - struct radv_shader_variant *shader) + const struct radv_pipeline *pipeline, + const struct radv_shader_variant *shader) { unsigned num_lds_blocks = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_lds_blocks; uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset; @@ -3857,8 +3972,8 @@ radv_pipeline_generate_hw_ls(struct radeon_cmdbuf *cs, static void radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, - struct radv_pipeline *pipeline, - struct radv_shader_variant *shader) + const struct radv_pipeline *pipeline, + const struct radv_shader_variant *shader) { uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset; gl_shader_stage es_type = @@ -3924,6 +4039,7 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) | S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | S_02881C_BYPASS_PRIM_RATE_COMBINER_GFX103(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3) | + S_02881C_BYPASS_VTX_RATE_COMBINER_GFX103(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3) | cull_dist_mask << 8 | clip_dist_mask); @@ -3990,8 +4106,8 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, static void radv_pipeline_generate_hw_hs(struct radeon_cmdbuf *cs, - struct radv_pipeline *pipeline, - struct radv_shader_variant *shader) + const struct radv_pipeline *pipeline, + const struct radv_shader_variant *shader) { uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset; @@ -4021,7 +4137,7 @@ radv_pipeline_generate_hw_hs(struct radeon_cmdbuf *cs, static void radv_pipeline_generate_vertex_shader(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, - struct radv_pipeline *pipeline) + const struct radv_pipeline *pipeline) { struct radv_shader_variant *vs; @@ -4043,7 +4159,7 @@ radv_pipeline_generate_vertex_shader(struct radeon_cmdbuf *ctx_cs, static void radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, - struct radv_pipeline *pipeline) + const struct radv_pipeline *pipeline) { struct radv_shader_variant *tes, *tcs; @@ -4072,7 +4188,7 @@ radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *ctx_cs, static void radv_pipeline_generate_tess_state(struct radeon_cmdbuf *ctx_cs, - struct radv_pipeline *pipeline, + const struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo) { struct radv_shader_variant *tes = radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL); @@ -4158,12 +4274,12 @@ radv_pipeline_generate_tess_state(struct radeon_cmdbuf *ctx_cs, static void radv_pipeline_generate_hw_gs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, - struct radv_pipeline *pipeline, - struct radv_shader_variant *gs) + const struct radv_pipeline *pipeline, + const struct radv_shader_variant *gs) { const struct gfx9_gs_info *gs_state = &gs->info.gs_ring_info; unsigned gs_max_out_vertices; - uint8_t *num_components; + const uint8_t *num_components; uint8_t max_stream; unsigned offset; uint64_t va; @@ -4233,7 +4349,7 @@ radv_pipeline_generate_hw_gs(struct radeon_cmdbuf *ctx_cs, static void radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, - struct radv_pipeline *pipeline) + const struct radv_pipeline *pipeline) { struct radv_shader_variant *gs; @@ -4281,7 +4397,7 @@ static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade, static void radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs, - struct radv_pipeline *pipeline) + const struct radv_pipeline *pipeline) { struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline); @@ -4458,7 +4574,7 @@ radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *ctx_cs, static void radv_pipeline_generate_vgt_vertex_reuse(struct radeon_cmdbuf *ctx_cs, - struct radv_pipeline *pipeline) + const struct radv_pipeline *pipeline) { if (pipeline->device->physical_device->rad_info.family < CHIP_POLARIS10 || pipeline->device->physical_device->rad_info.chip_class >= GFX10) @@ -4604,7 +4720,7 @@ gfx10_pipeline_generate_ge_cntl(struct radeon_cmdbuf *ctx_cs, static void radv_pipeline_generate_vgt_gs_out(struct radeon_cmdbuf *ctx_cs, - struct radv_pipeline *pipeline, + const struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct radv_graphics_pipeline_create_info *extra) { @@ -4661,7 +4777,6 @@ radv_pipeline_generate_pm4(struct radv_pipeline *pipeline, radv_pipeline_generate_fragment_shader(ctx_cs, cs, pipeline); radv_pipeline_generate_ps_inputs(ctx_cs, pipeline); radv_pipeline_generate_vgt_vertex_reuse(ctx_cs, pipeline); - radv_pipeline_generate_binning_state(ctx_cs, pipeline, pCreateInfo, blend); radv_pipeline_generate_vgt_shader_config(ctx_cs, pipeline); radv_pipeline_generate_cliprect_rule(ctx_cs, pCreateInfo); radv_pipeline_generate_vgt_gs_out(ctx_cs, pipeline, pCreateInfo, extra); @@ -4675,91 +4790,9 @@ radv_pipeline_generate_pm4(struct radv_pipeline *pipeline, assert(cs->cdw <= cs->max_dw); } -static struct radv_ia_multi_vgt_param_helpers -radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline) -{ - struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param = {0}; - const struct radv_device *device = pipeline->device; - - if (radv_pipeline_has_tess(pipeline)) - ia_multi_vgt_param.primgroup_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_patches; - else if (radv_pipeline_has_gs(pipeline)) - ia_multi_vgt_param.primgroup_size = 64; - else - ia_multi_vgt_param.primgroup_size = 128; /* recommended without a GS */ - - /* GS requirement. */ - ia_multi_vgt_param.partial_es_wave = false; - if (radv_pipeline_has_gs(pipeline) && device->physical_device->rad_info.chip_class <= GFX8) - if (SI_GS_PER_ES / ia_multi_vgt_param.primgroup_size >= pipeline->device->gs_table_depth - 3) - ia_multi_vgt_param.partial_es_wave = true; - - ia_multi_vgt_param.ia_switch_on_eoi = false; - if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.prim_id_input) - ia_multi_vgt_param.ia_switch_on_eoi = true; - if (radv_pipeline_has_gs(pipeline) && - pipeline->shaders[MESA_SHADER_GEOMETRY]->info.uses_prim_id) - ia_multi_vgt_param.ia_switch_on_eoi = true; - if (radv_pipeline_has_tess(pipeline)) { - /* SWITCH_ON_EOI must be set if PrimID is used. */ - if (pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id || - radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.uses_prim_id) - ia_multi_vgt_param.ia_switch_on_eoi = true; - } - - ia_multi_vgt_param.partial_vs_wave = false; - if (radv_pipeline_has_tess(pipeline)) { - /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */ - if ((device->physical_device->rad_info.family == CHIP_TAHITI || - device->physical_device->rad_info.family == CHIP_PITCAIRN || - device->physical_device->rad_info.family == CHIP_BONAIRE) && - radv_pipeline_has_gs(pipeline)) - ia_multi_vgt_param.partial_vs_wave = true; - /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */ - if (device->physical_device->rad_info.has_distributed_tess) { - if (radv_pipeline_has_gs(pipeline)) { - if (device->physical_device->rad_info.chip_class <= GFX8) - ia_multi_vgt_param.partial_es_wave = true; - } else { - ia_multi_vgt_param.partial_vs_wave = true; - } - } - } - - if (radv_pipeline_has_gs(pipeline)) { - /* On these chips there is the possibility of a hang if the - * pipeline uses a GS and partial_vs_wave is not set. - * - * This mostly does not hit 4-SE chips, as those typically set - * ia_switch_on_eoi and then partial_vs_wave is set for pipelines - * with GS due to another workaround. - * - * Reproducer: https://bugs.freedesktop.org/show_bug.cgi?id=109242 - */ - if (device->physical_device->rad_info.family == CHIP_TONGA || - device->physical_device->rad_info.family == CHIP_FIJI || - device->physical_device->rad_info.family == CHIP_POLARIS10 || - device->physical_device->rad_info.family == CHIP_POLARIS11 || - device->physical_device->rad_info.family == CHIP_POLARIS12 || - device->physical_device->rad_info.family == CHIP_VEGAM) { - ia_multi_vgt_param.partial_vs_wave = true; - } - } - - ia_multi_vgt_param.base = - S_028AA8_PRIMGROUP_SIZE(ia_multi_vgt_param.primgroup_size - 1) | - /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */ - S_028AA8_MAX_PRIMGRP_IN_WAVE(device->physical_device->rad_info.chip_class == GFX8 ? 2 : 0) | - S_030960_EN_INST_OPT_BASIC(device->physical_device->rad_info.chip_class >= GFX9) | - S_030960_EN_INST_OPT_ADV(device->physical_device->rad_info.chip_class >= GFX9); - - return ia_multi_vgt_param; -} - - static void -radv_compute_vertex_input_state(struct radv_pipeline *pipeline, - const VkGraphicsPipelineCreateInfo *pCreateInfo) +radv_pipeline_init_vertex_input_state(struct radv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *pCreateInfo) { const VkPipelineVertexInputStateCreateInfo *vi_info = pCreateInfo->pVertexInputState; @@ -4790,6 +4823,33 @@ radv_pipeline_get_streamout_shader(struct radv_pipeline *pipeline) return NULL; } +static void +radv_pipeline_init_shader_stages_state(struct radv_pipeline *pipeline) +{ + struct radv_device *device = pipeline->device; + + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + pipeline->user_data_0[i] = + radv_pipeline_stage_to_user_data_0(pipeline, i, + device->physical_device->rad_info.chip_class); + + if (pipeline->shaders[i]) { + pipeline->need_indirect_descriptor_sets |= pipeline->shaders[i]->info.need_indirect_descriptor_sets; + } + } + + struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX, + AC_UD_VS_BASE_VERTEX_START_INSTANCE); + if (loc->sgpr_idx != -1) { + pipeline->graphics.vtx_base_sgpr = pipeline->user_data_0[MESA_SHADER_VERTEX]; + pipeline->graphics.vtx_base_sgpr += loc->sgpr_idx * 4; + if (radv_get_shader(pipeline, MESA_SHADER_VERTEX)->info.vs.needs_draw_id) + pipeline->graphics.vtx_emit_num = 3; + else + pipeline->graphics.vtx_emit_num = 2; + } +} + static VkResult radv_pipeline_init(struct radv_pipeline *pipeline, struct radv_device *device, @@ -4832,6 +4892,8 @@ radv_pipeline_init(struct radv_pipeline *pipeline, radv_pipeline_init_multisample_state(pipeline, &blend, pCreateInfo); radv_pipeline_init_input_assembly_state(pipeline, pCreateInfo, extra); radv_pipeline_init_dynamic_state(pipeline, pCreateInfo, extra); + radv_pipeline_init_raster_state(pipeline, pCreateInfo); + radv_pipeline_init_depth_stencil_state(pipeline, pCreateInfo); /* Ensure that some export memory is always allocated, for two reasons: * @@ -4871,17 +4933,14 @@ radv_pipeline_init(struct radv_pipeline *pipeline, blend.cb_shader_mask = 0xf; } - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - if (pipeline->shaders[i]) { - pipeline->need_indirect_descriptor_sets |= pipeline->shaders[i]->info.need_indirect_descriptor_sets; - } - } + pipeline->graphics.col_format = blend.spi_shader_col_format; + pipeline->graphics.cb_target_mask = blend.cb_target_mask; if (radv_pipeline_has_gs(pipeline) && !radv_pipeline_has_ngg(pipeline)) { struct radv_shader_variant *gs = pipeline->shaders[MESA_SHADER_GEOMETRY]; - calculate_gs_ring_sizes(pipeline, &gs->info.gs_ring_info); + radv_pipeline_init_gs_ring_state(pipeline, &gs->info.gs_ring_info); } if (radv_pipeline_has_tess(pipeline)) { @@ -4889,28 +4948,14 @@ radv_pipeline_init(struct radv_pipeline *pipeline, pCreateInfo->pTessellationState->patchControlPoints; } - pipeline->graphics.ia_multi_vgt_param = radv_compute_ia_multi_vgt_param_helpers(pipeline); - - radv_compute_vertex_input_state(pipeline, pCreateInfo); - - for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) - pipeline->user_data_0[i] = radv_pipeline_stage_to_user_data_0(pipeline, i, device->physical_device->rad_info.chip_class); - - struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX, - AC_UD_VS_BASE_VERTEX_START_INSTANCE); - if (loc->sgpr_idx != -1) { - pipeline->graphics.vtx_base_sgpr = pipeline->user_data_0[MESA_SHADER_VERTEX]; - pipeline->graphics.vtx_base_sgpr += loc->sgpr_idx * 4; - if (radv_get_shader(pipeline, MESA_SHADER_VERTEX)->info.vs.needs_draw_id) - pipeline->graphics.vtx_emit_num = 3; - else - pipeline->graphics.vtx_emit_num = 2; - } + radv_pipeline_init_vertex_input_state(pipeline, pCreateInfo); + radv_pipeline_init_binning_state(pipeline, pCreateInfo, &blend); + radv_pipeline_init_shader_stages_state(pipeline); + radv_pipeline_init_scratch(device, pipeline); /* Find the last vertex shader stage that eventually uses streamout. */ pipeline->streamout_shader = radv_pipeline_get_streamout_shader(pipeline); - result = radv_pipeline_scratch_init(device, pipeline); radv_pipeline_generate_pm4(pipeline, pCreateInfo, extra, &blend); return result; @@ -4984,7 +5029,7 @@ VkResult radv_CreateGraphicsPipelines( static void radv_pipeline_generate_hw_cs(struct radeon_cmdbuf *cs, - struct radv_pipeline *pipeline) + const struct radv_pipeline *pipeline) { struct radv_shader_variant *shader = pipeline->shaders[MESA_SHADER_COMPUTE]; uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset; @@ -5004,7 +5049,7 @@ radv_pipeline_generate_hw_cs(struct radeon_cmdbuf *cs, static void radv_pipeline_generate_compute_state(struct radeon_cmdbuf *cs, - struct radv_pipeline *pipeline) + const struct radv_pipeline *pipeline) { struct radv_shader_variant *shader = pipeline->shaders[MESA_SHADER_COMPUTE]; struct radv_device *device = pipeline->device; @@ -5124,11 +5169,7 @@ static VkResult radv_compute_pipeline_create( pipeline->user_data_0[MESA_SHADER_COMPUTE] = radv_pipeline_stage_to_user_data_0(pipeline, MESA_SHADER_COMPUTE, device->physical_device->rad_info.chip_class); pipeline->need_indirect_descriptor_sets |= pipeline->shaders[MESA_SHADER_COMPUTE]->info.need_indirect_descriptor_sets; - result = radv_pipeline_scratch_init(device, pipeline); - if (result != VK_SUCCESS) { - radv_pipeline_destroy(device, pipeline, pAllocator); - return result; - } + radv_pipeline_init_scratch(device, pipeline); radv_compute_generate_pm4(pipeline); @@ -5474,7 +5515,7 @@ VkResult radv_GetPipelineExecutableInternalRepresentationsKHR( /* backend IR */ if (p < end) { p->isText = true; - if (pipeline->device->physical_device->use_llvm) { + if (radv_use_llvm_for_stage(pipeline->device, stage)) { desc_copy(p->name, "LLVM IR"); desc_copy(p->description, "The LLVM IR after some optimizations"); } else {