X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_pipeline.c;h=80d218716ea56db13134be8788685a4be268f28d;hb=aed9618e20a8314185b7d305b2309a63a3870c66;hp=a4cc840027a09e0c60d0e0c4bcd25a2f8120697f;hpb=c65015f83c3bad4c140a013bac2c9eb59abec907;p=mesa.git diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index a4cc840027a..80d218716ea 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -38,9 +38,6 @@ #include "spirv/nir_spirv.h" #include "vk_util.h" -#include -#include - #include "sid.h" #include "ac_binary.h" #include "ac_llvm_util.h" @@ -49,7 +46,6 @@ #include "util/debug.h" #include "ac_exp_param.h" #include "ac_shader_util.h" -#include "main/menums.h" struct radv_blend_state { uint32_t blend_enable_4bit; @@ -149,6 +145,22 @@ bool radv_pipeline_has_ngg(const struct radv_pipeline *pipeline) return variant->info.is_ngg; } +bool radv_pipeline_has_ngg_passthrough(const struct radv_pipeline *pipeline) +{ + assert(radv_pipeline_has_ngg(pipeline)); + + struct radv_shader_variant *variant = NULL; + if (pipeline->shaders[MESA_SHADER_GEOMETRY]) + variant = pipeline->shaders[MESA_SHADER_GEOMETRY]; + else if (pipeline->shaders[MESA_SHADER_TESS_EVAL]) + variant = pipeline->shaders[MESA_SHADER_TESS_EVAL]; + else if (pipeline->shaders[MESA_SHADER_VERTEX]) + variant = pipeline->shaders[MESA_SHADER_VERTEX]; + else + return false; + return variant->info.is_ngg_passthrough; +} + bool radv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline) { if (!radv_pipeline_has_gs(pipeline)) @@ -179,7 +191,9 @@ radv_pipeline_destroy(struct radv_device *device, if(pipeline->cs.buf) free(pipeline->cs.buf); - vk_free2(&device->alloc, allocator, pipeline); + + vk_object_base_finish(&pipeline->base); + vk_free2(&device->vk.alloc, allocator, pipeline); } void radv_DestroyPipeline( @@ -202,8 +216,6 @@ static uint32_t get_hash_flags(struct radv_device *device) if (device->instance->debug_flags & RADV_DEBUG_NO_NGG) hash_flags |= RADV_HASH_SHADER_NO_NGG; - if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED) - hash_flags |= RADV_HASH_SHADER_SISCHED; if (device->physical_device->cs_wave_size == 32) hash_flags |= RADV_HASH_SHADER_CS_WAVE32; if (device->physical_device->ps_wave_size == 32) @@ -978,7 +990,7 @@ static uint8_t radv_pipeline_get_ps_iter_samples(const VkGraphicsPipelineCreateI } if (vkms->sampleShadingEnable) { - ps_iter_samples = ceil(vkms->minSampleShading * num_samples); + ps_iter_samples = ceilf(vkms->minSampleShading * num_samples); ps_iter_samples = util_next_power_of_two(ps_iter_samples); } return ps_iter_samples; @@ -1226,6 +1238,23 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline, ms->pa_sc_mode_cntl_0 = S_028A48_ALTERNATE_RBS_PER_TILE(pipeline->device->physical_device->rad_info.chip_class >= GFX9) | S_028A48_VPORT_SCISSOR_ENABLE(1); + const VkPipelineRasterizationLineStateCreateInfoEXT *rast_line = + vk_find_struct_const(pCreateInfo->pRasterizationState->pNext, + PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT); + if (rast_line) { + ms->pa_sc_mode_cntl_0 |= S_028A48_LINE_STIPPLE_ENABLE(rast_line->stippledLineEnable); + if (rast_line->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT) { + /* From the Vulkan spec 1.1.129: + * + * "When VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT lines + * are being rasterized, sample locations may all be + * treated as being at the pixel center (this may + * affect attribute and depth interpolation)." + */ + ms->num_samples = 1; + } + } + if (ms->num_samples > 1) { RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); struct radv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; @@ -1381,6 +1410,8 @@ static unsigned radv_dynamic_state_mask(VkDynamicState state) return RADV_DYNAMIC_DISCARD_RECTANGLE; case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT: return RADV_DYNAMIC_SAMPLE_LOCATIONS; + case VK_DYNAMIC_STATE_LINE_STIPPLE_EXT: + return RADV_DYNAMIC_LINE_STIPPLE; default: unreachable("Unhandled dynamic state"); } @@ -1416,6 +1447,11 @@ static uint32_t radv_pipeline_needed_dynamic_state(const VkGraphicsPipelineCreat PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT)) states &= ~RADV_DYNAMIC_SAMPLE_LOCATIONS; + if (!pCreateInfo->pRasterizationState || + !vk_find_struct_const(pCreateInfo->pRasterizationState->pNext, + PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT)) + states &= ~RADV_DYNAMIC_LINE_STIPPLE; + /* TODO: blend constants & line width. */ return states; @@ -1568,6 +1604,14 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline, } } + const VkPipelineRasterizationLineStateCreateInfoEXT *rast_line_info = + vk_find_struct_const(pCreateInfo->pRasterizationState->pNext, + PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT); + if (needed_states & RADV_DYNAMIC_LINE_STIPPLE) { + dynamic->line_stipple.factor = rast_line_info->lineStippleFactor; + dynamic->line_stipple.pattern = rast_line_info->lineStipplePattern; + } + pipeline->dynamic_state.mask = states; } @@ -2212,6 +2256,54 @@ radv_link_shaders(struct radv_pipeline *pipeline, nir_shader **shaders) } } +static void +radv_set_linked_driver_locations(struct radv_pipeline *pipeline, nir_shader **shaders, + struct radv_shader_info infos[MESA_SHADER_STAGES]) +{ + bool has_tess = shaders[MESA_SHADER_TESS_CTRL]; + bool has_gs = shaders[MESA_SHADER_GEOMETRY]; + + if (!has_tess && !has_gs) + return; + + unsigned vs_info_idx = MESA_SHADER_VERTEX; + unsigned tes_info_idx = MESA_SHADER_TESS_EVAL; + + if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) { + /* These are merged into the next stage */ + vs_info_idx = has_tess ? MESA_SHADER_TESS_CTRL : MESA_SHADER_GEOMETRY; + tes_info_idx = has_gs ? MESA_SHADER_GEOMETRY : MESA_SHADER_TESS_EVAL; + } + + if (has_tess) { + nir_linked_io_var_info vs2tcs = + nir_assign_linked_io_var_locations(shaders[MESA_SHADER_VERTEX], shaders[MESA_SHADER_TESS_CTRL]); + nir_linked_io_var_info tcs2tes = + nir_assign_linked_io_var_locations(shaders[MESA_SHADER_TESS_CTRL], shaders[MESA_SHADER_TESS_EVAL]); + + infos[vs_info_idx].vs.num_linked_outputs = vs2tcs.num_linked_io_vars; + infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_inputs = vs2tcs.num_linked_io_vars; + infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_outputs = tcs2tes.num_linked_io_vars; + infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_patch_outputs = tcs2tes.num_linked_patch_io_vars; + infos[tes_info_idx].tes.num_linked_inputs = tcs2tes.num_linked_io_vars; + infos[tes_info_idx].tes.num_linked_patch_inputs = tcs2tes.num_linked_patch_io_vars; + + if (has_gs) { + nir_linked_io_var_info tes2gs = + nir_assign_linked_io_var_locations(shaders[MESA_SHADER_TESS_EVAL], shaders[MESA_SHADER_GEOMETRY]); + + infos[tes_info_idx].tes.num_linked_outputs = tes2gs.num_linked_io_vars; + infos[MESA_SHADER_GEOMETRY].gs.num_linked_inputs = tes2gs.num_linked_io_vars; + } + } else if (has_gs) { + nir_linked_io_var_info vs2gs = + nir_assign_linked_io_var_locations(shaders[MESA_SHADER_VERTEX], shaders[MESA_SHADER_GEOMETRY]); + + infos[vs_info_idx].vs.num_linked_outputs = vs2gs.num_linked_io_vars; + infos[MESA_SHADER_GEOMETRY].gs.num_linked_inputs = vs2gs.num_linked_io_vars; + } +} + static uint32_t radv_get_attrib_stride(const VkPipelineVertexInputStateCreateInfo *input_state, uint32_t attrib_binding) @@ -2419,35 +2511,44 @@ radv_fill_shader_keys(struct radv_device *device, keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false; } - /* - * Disable NGG with geometry shaders. There are a bunch of - * issues still: - * * GS primitives in pipeline statistic queries do not get - * updates. See dEQP-VK.query_pool.statistics_query.geometry_shader_primitives - * - * Furthermore, XGL/AMDVLK also disables this as of 9b632ef. - */ - if (nir[MESA_SHADER_GEOMETRY]) { + if (!device->physical_device->use_ngg_gs) { + if (nir[MESA_SHADER_GEOMETRY]) { + if (nir[MESA_SHADER_TESS_CTRL]) + keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false; + else + keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false; + } + } + + gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX; + + for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { + if (nir[i]) + last_xfb_stage = i; + } + + bool uses_xfb = nir[last_xfb_stage] && + radv_nir_stage_uses_xfb(nir[last_xfb_stage]); + + if (!device->physical_device->use_ngg_streamout && uses_xfb) { if (nir[MESA_SHADER_TESS_CTRL]) keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false; else keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false; } - if (!device->physical_device->use_ngg_streamout) { - gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX; - - for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { - if (nir[i]) - last_xfb_stage = i; - } - - if (nir[last_xfb_stage] && - radv_nir_stage_uses_xfb(nir[last_xfb_stage])) { - if (nir[MESA_SHADER_TESS_CTRL]) - keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false; - else - keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false; + /* Determine if the pipeline is eligible for the NGG passthrough + * mode. It can't be enabled for geometry shaders, for NGG + * streamout or for vertex shaders that export the primitive ID + * (this is checked later because we don't have the info here.) + */ + if (!nir[MESA_SHADER_GEOMETRY] && !uses_xfb) { + if (nir[MESA_SHADER_TESS_CTRL] && + keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg) { + keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg_passthrough = true; + } else if (nir[MESA_SHADER_VERTEX] && + keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg) { + keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg_passthrough = true; } } } @@ -2487,6 +2588,17 @@ radv_get_wave_size(struct radv_device *device, return device->physical_device->ge_wave_size; } +static uint8_t +radv_get_ballot_bit_size(struct radv_device *device, + const VkPipelineShaderStageCreateInfo *pStage, + gl_shader_stage stage, + const struct radv_shader_variant_key *key) +{ + if (stage == MESA_SHADER_COMPUTE && key->cs.subgroup_size) + return key->cs.subgroup_size; + return 64; +} + static void radv_fill_shader_info(struct radv_pipeline *pipeline, const VkPipelineShaderStageCreateInfo **pStages, @@ -2507,7 +2619,8 @@ radv_fill_shader_info(struct radv_pipeline *pipeline, radv_nir_shader_info_pass(nir[MESA_SHADER_FRAGMENT], pipeline->layout, &keys[MESA_SHADER_FRAGMENT], - &infos[MESA_SHADER_FRAGMENT]); + &infos[MESA_SHADER_FRAGMENT], + pipeline->device->physical_device->use_aco); /* TODO: These are no longer used as keys we should refactor this */ keys[MESA_SHADER_VERTEX].vs_common_out.export_prim_id = @@ -2516,16 +2629,37 @@ radv_fill_shader_info(struct radv_pipeline *pipeline, infos[MESA_SHADER_FRAGMENT].ps.layer_input; keys[MESA_SHADER_VERTEX].vs_common_out.export_clip_dists = !!infos[MESA_SHADER_FRAGMENT].ps.num_input_clips_culls; + keys[MESA_SHADER_VERTEX].vs_common_out.export_viewport_index = + infos[MESA_SHADER_FRAGMENT].ps.viewport_index_input; keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_prim_id = infos[MESA_SHADER_FRAGMENT].ps.prim_id_input; keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_layer_id = infos[MESA_SHADER_FRAGMENT].ps.layer_input; keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_clip_dists = !!infos[MESA_SHADER_FRAGMENT].ps.num_input_clips_culls; + keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_viewport_index = + infos[MESA_SHADER_FRAGMENT].ps.viewport_index_input; + + /* NGG passthrough mode can't be enabled for vertex shaders + * that export the primitive ID. + * + * TODO: I should really refactor the keys logic. + */ + if (nir[MESA_SHADER_VERTEX] && + keys[MESA_SHADER_VERTEX].vs_common_out.export_prim_id) { + keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg_passthrough = false; + } filled_stages |= (1 << MESA_SHADER_FRAGMENT); } + if (nir[MESA_SHADER_TESS_CTRL]) { + infos[MESA_SHADER_TESS_CTRL].tcs.tes_inputs_read = + nir[MESA_SHADER_TESS_EVAL]->info.inputs_read; + infos[MESA_SHADER_TESS_CTRL].tcs.tes_patch_inputs_read = + nir[MESA_SHADER_TESS_EVAL]->info.patch_inputs_read; + } + if (pipeline->device->physical_device->rad_info.chip_class >= GFX9 && nir[MESA_SHADER_TESS_CTRL]) { struct nir_shader *combined_nir[] = {nir[MESA_SHADER_VERTEX], nir[MESA_SHADER_TESS_CTRL]}; @@ -2537,7 +2671,8 @@ radv_fill_shader_info(struct radv_pipeline *pipeline, for (int i = 0; i < 2; i++) { radv_nir_shader_info_pass(combined_nir[i], pipeline->layout, &key, - &infos[MESA_SHADER_TESS_CTRL]); + &infos[MESA_SHADER_TESS_CTRL], + pipeline->device->physical_device->use_aco); } keys[MESA_SHADER_TESS_EVAL].tes.num_patches = @@ -2560,7 +2695,8 @@ radv_fill_shader_info(struct radv_pipeline *pipeline, radv_nir_shader_info_pass(combined_nir[i], pipeline->layout, &keys[pre_stage], - &infos[MESA_SHADER_GEOMETRY]); + &infos[MESA_SHADER_GEOMETRY], + pipeline->device->physical_device->use_aco); } filled_stages |= (1 << pre_stage); @@ -2585,14 +2721,19 @@ radv_fill_shader_info(struct radv_pipeline *pipeline, radv_nir_shader_info_init(&infos[i]); radv_nir_shader_info_pass(nir[i], pipeline->layout, - &keys[i], &infos[i]); + &keys[i], &infos[i], pipeline->device->physical_device->use_aco); } for (int i = 0; i < MESA_SHADER_STAGES; i++) { - if (nir[i]) + if (nir[i]) { infos[i].wave_size = radv_get_wave_size(pipeline->device, pStages[i], i, &keys[i]); + infos[i].ballot_bit_size = + radv_get_ballot_bit_size(pipeline->device, + pStages[i], i, + &keys[i]); + } } } @@ -2673,14 +2814,6 @@ void radv_stop_feedback(VkPipelineCreationFeedbackEXT *feedback, bool cache_hit) (cache_hit ? VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT : 0); } -static -bool radv_aco_supported_stage(gl_shader_stage stage, bool has_gs, bool has_ts) -{ - return (stage == MESA_SHADER_VERTEX && !has_gs && !has_ts) || - stage == MESA_SHADER_FRAGMENT || - stage == MESA_SHADER_COMPUTE; -} - void radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device, struct radv_pipeline_cache *cache, @@ -2698,6 +2831,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline, struct radv_shader_info infos[MESA_SHADER_STAGES] = {0}; unsigned char hash[20], gs_copy_hash[20]; bool keep_executable_info = (flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) || device->keep_shader_info; + bool keep_statistic_info = (flags & VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR) || device->keep_shader_info; radv_start_feedback(pipeline_feedback); @@ -2718,14 +2852,14 @@ void radv_create_shaders(struct radv_pipeline *pipeline, gs_copy_hash[0] ^= 1; bool found_in_application_cache = true; - if (modules[MESA_SHADER_GEOMETRY] && !keep_executable_info) { + if (modules[MESA_SHADER_GEOMETRY] && !keep_executable_info && !keep_statistic_info) { struct radv_shader_variant *variants[MESA_SHADER_STAGES] = {0}; radv_create_shader_variants_from_pipeline_cache(device, cache, gs_copy_hash, variants, &found_in_application_cache); pipeline->gs_copy_shader = variants[MESA_SHADER_GEOMETRY]; } - if (!keep_executable_info && + if (!keep_executable_info && !keep_statistic_info && radv_create_shader_variants_from_pipeline_cache(device, cache, hash, pipeline->shaders, &found_in_application_cache) && (!modules[MESA_SHADER_GEOMETRY] || pipeline->gs_copy_shader)) { @@ -2741,23 +2875,29 @@ void radv_create_shaders(struct radv_pipeline *pipeline, modules[MESA_SHADER_FRAGMENT] = &fs_m; } - bool has_gs = modules[MESA_SHADER_GEOMETRY]; - bool has_ts = modules[MESA_SHADER_TESS_CTRL] || modules[MESA_SHADER_TESS_EVAL]; - bool use_aco = device->physical_device->use_aco; - for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) { const VkPipelineShaderStageCreateInfo *stage = pStages[i]; + unsigned subgroup_size = 64, ballot_bit_size = 64; if (!modules[i]) continue; radv_start_feedback(stage_feedbacks[i]); - bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts); + if (key->compute_subgroup_size) { + /* Only compute shaders currently support requiring a + * specific subgroup size. + */ + assert(i == MESA_SHADER_COMPUTE); + subgroup_size = key->compute_subgroup_size; + ballot_bit_size = key->compute_subgroup_size; + } + nir[i] = radv_shader_compile_to_nir(device, modules[i], stage ? stage->pName : "main", i, stage ? stage->pSpecializationInfo : NULL, - flags, pipeline->layout, aco); + flags, pipeline->layout, + subgroup_size, ballot_bit_size); /* We don't want to alter meta shaders IR directly so clone it * first. @@ -2777,26 +2917,32 @@ void radv_create_shaders(struct radv_pipeline *pipeline, if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)) radv_link_shaders(pipeline, nir); + radv_set_linked_driver_locations(pipeline, nir, infos); + for (int i = 0; i < MESA_SHADER_STAGES; ++i) { if (nir[i]) { - NIR_PASS_V(nir[i], nir_lower_non_uniform_access, - nir_lower_non_uniform_ubo_access | - nir_lower_non_uniform_ssbo_access | - nir_lower_non_uniform_texture_access | - nir_lower_non_uniform_image_access); - - bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts); - if (!aco) + /* do this again since information such as outputs_read can be out-of-date */ + nir_shader_gather_info(nir[i], nir_shader_get_entrypoint(nir[i])); + + if (device->physical_device->use_aco) { + NIR_PASS_V(nir[i], nir_lower_non_uniform_access, + nir_lower_non_uniform_ubo_access | + nir_lower_non_uniform_ssbo_access | + nir_lower_non_uniform_texture_access | + nir_lower_non_uniform_image_access); + } else NIR_PASS_V(nir[i], nir_lower_bool_to_int32); } - - if (radv_can_dump_shader(device, modules[i], false)) - nir_print_shader(nir[i], stderr); } if (nir[MESA_SHADER_FRAGMENT]) radv_lower_fs_io(nir[MESA_SHADER_FRAGMENT]); + for (int i = 0; i < MESA_SHADER_STAGES; ++i) { + if (radv_can_dump_shader(device, modules[i], false)) + nir_print_shader(nir[i], stderr); + } + radv_fill_shader_keys(device, keys, key, nir); radv_fill_shader_info(pipeline, pStages, keys, infos, nir); @@ -2822,16 +2968,52 @@ void radv_create_shaders(struct radv_pipeline *pipeline, gfx9_get_gs_info(key, pipeline, nir, infos, gs_info); } + if(modules[MESA_SHADER_GEOMETRY]) { + struct radv_shader_binary *gs_copy_binary = NULL; + if (!pipeline->gs_copy_shader && + !radv_pipeline_has_ngg(pipeline)) { + struct radv_shader_info info = {}; + struct radv_shader_variant_key key = {}; + + key.has_multiview_view_index = + keys[MESA_SHADER_GEOMETRY].has_multiview_view_index; + + radv_nir_shader_info_pass(nir[MESA_SHADER_GEOMETRY], + pipeline->layout, &key, + &info, pipeline->device->physical_device->use_aco); + info.wave_size = 64; /* Wave32 not supported. */ + info.ballot_bit_size = 64; + + pipeline->gs_copy_shader = radv_create_gs_copy_shader( + device, nir[MESA_SHADER_GEOMETRY], &info, + &gs_copy_binary, keep_executable_info, keep_statistic_info, + keys[MESA_SHADER_GEOMETRY].has_multiview_view_index); + } + + if (!keep_executable_info && !keep_statistic_info && pipeline->gs_copy_shader) { + struct radv_shader_binary *binaries[MESA_SHADER_STAGES] = {NULL}; + struct radv_shader_variant *variants[MESA_SHADER_STAGES] = {0}; + + binaries[MESA_SHADER_GEOMETRY] = gs_copy_binary; + variants[MESA_SHADER_GEOMETRY] = pipeline->gs_copy_shader; + + radv_pipeline_cache_insert_shaders(device, cache, + gs_copy_hash, + variants, + binaries); + } + free(gs_copy_binary); + } + if (nir[MESA_SHADER_FRAGMENT]) { if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) { radv_start_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT]); - bool aco = use_aco && radv_aco_supported_stage(MESA_SHADER_FRAGMENT, has_gs, has_ts); pipeline->shaders[MESA_SHADER_FRAGMENT] = radv_shader_variant_compile(device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1, pipeline->layout, keys + MESA_SHADER_FRAGMENT, infos + MESA_SHADER_FRAGMENT, - keep_executable_info, aco, + keep_executable_info, keep_statistic_info, &binaries[MESA_SHADER_FRAGMENT]); radv_stop_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT], false); @@ -2849,7 +3031,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline, pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_compile(device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2, pipeline->layout, &key, &infos[MESA_SHADER_TESS_CTRL], keep_executable_info, - false, &binaries[MESA_SHADER_TESS_CTRL]); + keep_statistic_info, &binaries[MESA_SHADER_TESS_CTRL]); radv_stop_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL], false); } @@ -2868,7 +3050,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline, pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_compile(device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2, pipeline->layout, &keys[pre_stage], &infos[MESA_SHADER_GEOMETRY], keep_executable_info, - false, &binaries[MESA_SHADER_GEOMETRY]); + keep_statistic_info, &binaries[MESA_SHADER_GEOMETRY]); radv_stop_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY], false); } @@ -2887,53 +3069,16 @@ void radv_create_shaders(struct radv_pipeline *pipeline, radv_start_feedback(stage_feedbacks[i]); - bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts); pipeline->shaders[i] = radv_shader_variant_compile(device, modules[i], &nir[i], 1, pipeline->layout, - keys + i, infos + i,keep_executable_info, - aco, &binaries[i]); + keys + i, infos + i, keep_executable_info, + keep_statistic_info, &binaries[i]); radv_stop_feedback(stage_feedbacks[i], false); } } - if(modules[MESA_SHADER_GEOMETRY]) { - struct radv_shader_binary *gs_copy_binary = NULL; - if (!pipeline->gs_copy_shader && - !radv_pipeline_has_ngg(pipeline)) { - struct radv_shader_info info = {}; - struct radv_shader_variant_key key = {}; - - key.has_multiview_view_index = - keys[MESA_SHADER_GEOMETRY].has_multiview_view_index; - - radv_nir_shader_info_pass(nir[MESA_SHADER_GEOMETRY], - pipeline->layout, &key, - &info); - info.wave_size = 64; /* Wave32 not supported. */ - - pipeline->gs_copy_shader = radv_create_gs_copy_shader( - device, nir[MESA_SHADER_GEOMETRY], &info, - &gs_copy_binary, keep_executable_info, - keys[MESA_SHADER_GEOMETRY].has_multiview_view_index); - } - - if (!keep_executable_info && pipeline->gs_copy_shader) { - struct radv_shader_binary *binaries[MESA_SHADER_STAGES] = {NULL}; - struct radv_shader_variant *variants[MESA_SHADER_STAGES] = {0}; - - binaries[MESA_SHADER_GEOMETRY] = gs_copy_binary; - variants[MESA_SHADER_GEOMETRY] = pipeline->gs_copy_shader; - - radv_pipeline_cache_insert_shaders(device, cache, - gs_copy_hash, - variants, - binaries); - } - free(gs_copy_binary); - } - - if (!keep_executable_info) { + if (!keep_executable_info && !keep_statistic_info) { radv_pipeline_cache_insert_shaders(device, cache, hash, pipeline->shaders, binaries); } @@ -3432,8 +3577,13 @@ radv_get_binning_settings(const struct radv_physical_device *pdev) { struct radv_binning_settings settings; if (pdev->rad_info.has_dedicated_vram) { - settings.context_states_per_bin = 1; - settings.persistent_states_per_bin = 1; + if (pdev->rad_info.num_render_backends > 4) { + settings.context_states_per_bin = 1; + settings.persistent_states_per_bin = 1; + } else { + settings.context_states_per_bin = 3; + settings.persistent_states_per_bin = 8; + } settings.fpovs_per_batch = 63; } else { /* The context states are affected by the scissor bug. */ @@ -3550,9 +3700,9 @@ radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs, db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(extra->db_depth_clear); db_render_control |= S_028000_STENCIL_CLEAR_ENABLE(extra->db_stencil_clear); - db_render_control |= S_028000_RESUMMARIZE_ENABLE(extra->db_resummarize); - db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(extra->db_flush_depth_inplace); - db_render_control |= S_028000_STENCIL_COMPRESS_DISABLE(extra->db_flush_stencil_inplace); + db_render_control |= S_028000_RESUMMARIZE_ENABLE(extra->resummarize_enable); + db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(extra->depth_compress_disable); + db_render_control |= S_028000_STENCIL_COMPRESS_DISABLE(extra->stencil_compress_disable); db_render_override2 |= S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(extra->db_depth_disable_expclear); db_render_override2 |= S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(extra->db_stencil_disable_expclear); } @@ -3950,7 +4100,7 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN, S_028A84_PRIMITIVEID_EN(es_enable_prim_id) | - S_028A84_NGG_DISABLE_PROVOK_REUSE(es_enable_prim_id)); + S_028A84_NGG_DISABLE_PROVOK_REUSE(outinfo->export_prim_id)); radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, ngg_state->vgt_esgs_ring_itemsize); @@ -3994,9 +4144,7 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, * * Requirement: GE_CNTL.VERT_GRP_SIZE = VGT_GS_ONCHIP_CNTL.ES_VERTS_PER_SUBGRP - 5 */ - if ((pipeline->device->physical_device->rad_info.family == CHIP_NAVI10 || - pipeline->device->physical_device->rad_info.family == CHIP_NAVI12 || - pipeline->device->physical_device->rad_info.family == CHIP_NAVI14) && + if (pipeline->device->physical_device->rad_info.chip_class == GFX10 && !radv_pipeline_has_tess(pipeline) && ngg_state->hw_max_esverts != 256) { ge_cntl &= C_03096C_VERT_GRP_SIZE; @@ -4210,13 +4358,20 @@ radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *ctx_cs, gs->info.gs.vertices_out); } -static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade, bool float16) +static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade, + bool explicit, bool float16) { uint32_t ps_input_cntl; if (offset <= AC_EXP_PARAM_OFFSET_31) { ps_input_cntl = S_028644_OFFSET(offset); - if (flat_shade) + if (flat_shade || explicit) ps_input_cntl |= S_028644_FLAT_SHADE(1); + if (explicit) { + /* Force parameter cache to be read in passthrough + * mode. + */ + ps_input_cntl |= S_028644_OFFSET(1 << 5); + } if (float16) { ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) | S_028644_ATTR0_VALID(1); @@ -4245,7 +4400,7 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs, if (ps->info.ps.prim_id_input) { unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID]; if (vs_offset != AC_EXP_PARAM_UNDEFINED) { - ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false); + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false, false); ++ps_offset; } } @@ -4254,9 +4409,18 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs, ps->info.needs_multiview_view_index) { unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER]; if (vs_offset != AC_EXP_PARAM_UNDEFINED) - ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false); + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false, false); else - ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false); + ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false, false); + ++ps_offset; + } + + if (ps->info.ps.viewport_index_input) { + unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VIEWPORT]; + if (vs_offset != AC_EXP_PARAM_UNDEFINED) + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false, false); + else + ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false, false); ++ps_offset; } @@ -4272,14 +4436,14 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs, vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0]; if (vs_offset != AC_EXP_PARAM_UNDEFINED) { - ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false); + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false, false); ++ps_offset; } vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1]; if (vs_offset != AC_EXP_PARAM_UNDEFINED && ps->info.ps.num_input_clips_culls > 4) { - ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false); + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false, false); ++ps_offset; } } @@ -4287,6 +4451,7 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs, for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.ps.input_mask; ++i) { unsigned vs_offset; bool flat_shade; + bool explicit; bool float16; if (!(ps->info.ps.input_mask & (1u << i))) continue; @@ -4299,9 +4464,10 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs, } flat_shade = !!(ps->info.ps.flat_shaded_mask & (1u << ps_offset)); + explicit = !!(ps->info.ps.explicit_shaded_mask & (1u << ps_offset)); float16 = !!(ps->info.ps.float16_shaded_mask & (1u << ps_offset)); - ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, float16); + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, explicit, float16); ++ps_offset; } @@ -4434,6 +4600,8 @@ radv_compute_vgt_shader_stages_en(const struct radv_pipeline *pipeline) stages |= S_028B54_PRIMGEN_EN(1); if (pipeline->streamout_shader) stages |= S_028B54_NGG_WAVE_ID_EN(1); + if (radv_pipeline_has_ngg_passthrough(pipeline)) + stages |= S_028B54_PRIMGEN_PASSTHRU_EN(1); } else if (radv_pipeline_has_gs(pipeline)) { stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); } @@ -4978,6 +5146,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline, uint32_t gs_out; uint32_t prim = si_translate_prim(pCreateInfo->pInputAssemblyState->topology); + pipeline->graphics.topology = pCreateInfo->pInputAssemblyState->topology; pipeline->graphics.can_use_guardband = radv_prim_can_use_guardband(pCreateInfo->pInputAssemblyState->topology); if (radv_pipeline_has_gs(pipeline)) { @@ -5093,11 +5262,14 @@ radv_graphics_pipeline_create( struct radv_pipeline *pipeline; VkResult result; - pipeline = vk_zalloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, + pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pipeline == NULL) return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + vk_object_base_init(&device->vk, &pipeline->base, + VK_OBJECT_TYPE_PIPELINE); + result = radv_pipeline_init(pipeline, device, cache, pCreateInfo, extra); if (result != VK_SUCCESS) { @@ -5231,11 +5403,14 @@ static VkResult radv_compute_pipeline_create( struct radv_pipeline *pipeline; VkResult result; - pipeline = vk_zalloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, + pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pipeline == NULL) return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + vk_object_base_init(&device->vk, &pipeline->base, + VK_OBJECT_TYPE_PIPELINE); + pipeline->device = device; pipeline->layout = radv_pipeline_layout_from_handle(pCreateInfo->layout); assert(pipeline->layout); @@ -5539,6 +5714,20 @@ VkResult radv_GetPipelineExecutableStatisticsKHR( } ++s; + if (shader->statistics) { + for (unsigned i = 0; i < shader->statistics->count; i++) { + struct radv_compiler_statistic_info *info = &shader->statistics->infos[i]; + uint32_t value = shader->statistics->values[i]; + if (s < end) { + desc_copy(s->name, info->name); + desc_copy(s->description, info->desc); + s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; + s->value.u64 = value; + } + ++s; + } + } + if (!pStatistics) *pStatisticCount = s - pStatistics; else if (s > end) { @@ -5594,7 +5783,7 @@ VkResult radv_GetPipelineExecutableInternalRepresentationsKHR( /* backend IR */ if (p < end) { p->isText = true; - if (shader->aco_used) { + if (pipeline->device->physical_device->use_aco) { desc_copy(p->name, "ACO IR"); desc_copy(p->description, "The ACO IR after some optimizations"); } else {