From 66c703b3e8a6a7e3c03e577c8deb377536ce5af2 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Mon, 11 Nov 2019 18:05:03 +0100 Subject: [PATCH] radv: Move argument declaration out of nir_to_llvm Now it's executed for ACO too. Reviewed-by: Samuel Pitoiset --- src/amd/vulkan/meson.build | 1 + src/amd/vulkan/radv_nir_to_llvm.c | 825 ++---------------------------- src/amd/vulkan/radv_private.h | 11 +- src/amd/vulkan/radv_shader.c | 18 +- src/amd/vulkan/radv_shader_args.c | 747 +++++++++++++++++++++++++++ src/amd/vulkan/radv_shader_args.h | 5 + 6 files changed, 823 insertions(+), 784 deletions(-) create mode 100644 src/amd/vulkan/radv_shader_args.c diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build index 37e76cc41a9..e620478a43b 100644 --- a/src/amd/vulkan/meson.build +++ b/src/amd/vulkan/meson.build @@ -101,6 +101,7 @@ libradv_files = files( 'radv_radeon_winsys.h', 'radv_shader.c', 'radv_shader.h', + 'radv_shader_args.c', 'radv_shader_args.h', 'radv_shader_helper.h', 'radv_shader_info.c', diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 060dbcf2afb..11f983974d6 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -318,7 +318,7 @@ get_tcs_out_current_patch_data_offset(struct radv_shader_context *ctx) static LLVMValueRef create_llvm_function(struct ac_llvm_context *ctx, LLVMModuleRef module, LLVMBuilderRef builder, - struct ac_shader_args *args, + const struct ac_shader_args *args, enum ac_llvm_calling_convention convention, unsigned max_workgroup_size, const struct radv_nir_compiler_options *options) @@ -337,385 +337,6 @@ create_llvm_function(struct ac_llvm_context *ctx, LLVMModuleRef module, return main_function; } - -static void -set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx, - uint8_t num_sgprs) -{ - ud_info->sgpr_idx = *sgpr_idx; - ud_info->num_sgprs = num_sgprs; - *sgpr_idx += num_sgprs; -} - -static void -set_loc_shader(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx, - uint8_t num_sgprs) -{ - struct radv_userdata_info *ud_info = - &args->shader_info->user_sgprs_locs.shader_data[idx]; - assert(ud_info); - - set_loc(ud_info, sgpr_idx, num_sgprs); -} - -static void -set_loc_shader_ptr(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx) -{ - bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS; - - set_loc_shader(args, idx, sgpr_idx, use_32bit_pointers ? 1 : 2); -} - -static void -set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx) -{ - struct radv_userdata_locations *locs = - &args->shader_info->user_sgprs_locs; - struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx]; - assert(ud_info); - - set_loc(ud_info, sgpr_idx, 1); - - locs->descriptor_sets_enabled |= 1 << idx; -} - -struct user_sgpr_info { - bool need_ring_offsets; - bool indirect_all_descriptor_sets; - uint8_t remaining_sgprs; -}; - -static bool needs_view_index_sgpr(struct radv_shader_args *args, - gl_shader_stage stage) -{ - switch (stage) { - case MESA_SHADER_VERTEX: - if (args->shader_info->needs_multiview_view_index || - (!args->options->key.vs_common_out.as_es && !args->options->key.vs_common_out.as_ls && args->options->key.has_multiview_view_index)) - return true; - break; - case MESA_SHADER_TESS_EVAL: - if (args->shader_info->needs_multiview_view_index || (!args->options->key.vs_common_out.as_es && args->options->key.has_multiview_view_index)) - return true; - break; - case MESA_SHADER_GEOMETRY: - case MESA_SHADER_TESS_CTRL: - if (args->shader_info->needs_multiview_view_index) - return true; - break; - default: - break; - } - return false; -} - -static uint8_t -count_vs_user_sgprs(struct radv_shader_args *args) -{ - uint8_t count = 0; - - if (args->shader_info->vs.has_vertex_buffers) - count++; - count += args->shader_info->vs.needs_draw_id ? 3 : 2; - - return count; -} - -static void allocate_inline_push_consts(struct radv_shader_args *args, - struct user_sgpr_info *user_sgpr_info) -{ - uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs; - - /* Only supported if shaders use push constants. */ - if (args->shader_info->min_push_constant_used == UINT8_MAX) - return; - - /* Only supported if shaders don't have indirect push constants. */ - if (args->shader_info->has_indirect_push_constants) - return; - - /* Only supported for 32-bit push constants. */ - if (!args->shader_info->has_only_32bit_push_constants) - return; - - uint8_t num_push_consts = - (args->shader_info->max_push_constant_used - - args->shader_info->min_push_constant_used) / 4; - - /* Check if the number of user SGPRs is large enough. */ - if (num_push_consts < remaining_sgprs) { - args->shader_info->num_inline_push_consts = num_push_consts; - } else { - args->shader_info->num_inline_push_consts = remaining_sgprs; - } - - /* Clamp to the maximum number of allowed inlined push constants. */ - if (args->shader_info->num_inline_push_consts > AC_MAX_INLINE_PUSH_CONSTS) - args->shader_info->num_inline_push_consts = AC_MAX_INLINE_PUSH_CONSTS; - - if (args->shader_info->num_inline_push_consts == num_push_consts && - !args->shader_info->loads_dynamic_offsets) { - /* Disable the default push constants path if all constants are - * inlined and if shaders don't use dynamic descriptors. - */ - args->shader_info->loads_push_constants = false; - } - - args->shader_info->base_inline_push_consts = - args->shader_info->min_push_constant_used / 4; -} - -static void allocate_user_sgprs(struct radv_shader_args *args, - gl_shader_stage stage, - bool has_previous_stage, - gl_shader_stage previous_stage, - bool needs_view_index, - struct user_sgpr_info *user_sgpr_info) -{ - uint8_t user_sgpr_count = 0; - - memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info)); - - /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */ - if (stage == MESA_SHADER_GEOMETRY || - stage == MESA_SHADER_VERTEX || - stage == MESA_SHADER_TESS_CTRL || - stage == MESA_SHADER_TESS_EVAL || - args->is_gs_copy_shader) - user_sgpr_info->need_ring_offsets = true; - - if (stage == MESA_SHADER_FRAGMENT && - args->shader_info->ps.needs_sample_positions) - user_sgpr_info->need_ring_offsets = true; - - /* 2 user sgprs will nearly always be allocated for scratch/rings */ - if (args->options->supports_spill || user_sgpr_info->need_ring_offsets) { - user_sgpr_count += 2; - } - - switch (stage) { - case MESA_SHADER_COMPUTE: - if (args->shader_info->cs.uses_grid_size) - user_sgpr_count += 3; - break; - case MESA_SHADER_FRAGMENT: - user_sgpr_count += args->shader_info->ps.needs_sample_positions; - break; - case MESA_SHADER_VERTEX: - if (!args->is_gs_copy_shader) - user_sgpr_count += count_vs_user_sgprs(args); - break; - case MESA_SHADER_TESS_CTRL: - if (has_previous_stage) { - if (previous_stage == MESA_SHADER_VERTEX) - user_sgpr_count += count_vs_user_sgprs(args); - } - break; - case MESA_SHADER_TESS_EVAL: - break; - case MESA_SHADER_GEOMETRY: - if (has_previous_stage) { - if (previous_stage == MESA_SHADER_VERTEX) { - user_sgpr_count += count_vs_user_sgprs(args); - } - } - break; - default: - break; - } - - if (needs_view_index) - user_sgpr_count++; - - if (args->shader_info->loads_push_constants) - user_sgpr_count++; - - if (args->shader_info->so.num_outputs) - user_sgpr_count++; - - uint32_t available_sgprs = args->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16; - uint32_t remaining_sgprs = available_sgprs - user_sgpr_count; - uint32_t num_desc_set = - util_bitcount(args->shader_info->desc_set_used_mask); - - if (remaining_sgprs < num_desc_set) { - user_sgpr_info->indirect_all_descriptor_sets = true; - user_sgpr_info->remaining_sgprs = remaining_sgprs - 1; - } else { - user_sgpr_info->remaining_sgprs = remaining_sgprs - num_desc_set; - } - - allocate_inline_push_consts(args, user_sgpr_info); -} - -static void -declare_global_input_sgprs(struct radv_shader_args *args, - const struct user_sgpr_info *user_sgpr_info) -{ - /* 1 for each descriptor set */ - if (!user_sgpr_info->indirect_all_descriptor_sets) { - uint32_t mask = args->shader_info->desc_set_used_mask; - - while (mask) { - int i = u_bit_scan(&mask); - - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, - &args->descriptor_sets[i]); - } - } else { - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR_PTR, - &args->descriptor_sets[0]); - } - - if (args->shader_info->loads_push_constants) { - /* 1 for push constants and dynamic descriptors */ - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, - &args->ac.push_constants); - } - - for (unsigned i = 0; i < args->shader_info->num_inline_push_consts; i++) { - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->ac.inline_push_consts[i]); - } - args->ac.num_inline_push_consts = args->shader_info->num_inline_push_consts; - args->ac.base_inline_push_consts = args->shader_info->base_inline_push_consts; - - if (args->shader_info->so.num_outputs) { - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, - &args->streamout_buffers); - } -} - -static void -declare_vs_specific_input_sgprs(struct radv_shader_args *args, - gl_shader_stage stage, - bool has_previous_stage, - gl_shader_stage previous_stage) -{ - if (!args->is_gs_copy_shader && - (stage == MESA_SHADER_VERTEX || - (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { - if (args->shader_info->vs.has_vertex_buffers) { - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, - &args->vertex_buffers); - } - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance); - if (args->shader_info->vs.needs_draw_id) { - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id); - } - } -} - -static void -declare_vs_input_vgprs(struct radv_shader_args *args) -{ - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id); - if (!args->is_gs_copy_shader) { - if (args->options->key.vs_common_out.as_ls) { - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->rel_auto_id); - if (args->options->chip_class >= GFX10) { - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); - } else { - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */ - } - } else { - if (args->options->chip_class >= GFX10) { - if (args->options->key.vs_common_out.as_ngg) { - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); - } else { - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */ - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->vs_prim_id); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); - } - } else { - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->vs_prim_id); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */ - } - } - } -} - -static void -declare_streamout_sgprs(struct radv_shader_args *args, gl_shader_stage stage) -{ - int i; - - if (args->options->use_ngg_streamout) { - if (stage == MESA_SHADER_TESS_EVAL) - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); - return; - } - - /* Streamout SGPRs. */ - if (args->shader_info->so.num_outputs) { - assert(stage == MESA_SHADER_VERTEX || - stage == MESA_SHADER_TESS_EVAL); - - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_config); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_write_idx); - } else if (stage == MESA_SHADER_TESS_EVAL) { - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); - } - - /* A streamout buffer offset is loaded if the stride is non-zero. */ - for (i = 0; i < 4; i++) { - if (!args->shader_info->so.strides[i]) - continue; - - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_offset[i]); - } -} - -static void -declare_tes_input_vgprs(struct radv_shader_args *args) -{ - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->tes_u); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->tes_v); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->tes_rel_patch_id); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id); -} - -static void -set_global_input_locs(struct radv_shader_args *args, - const struct user_sgpr_info *user_sgpr_info, - uint8_t *user_sgpr_idx) -{ - uint32_t mask = args->shader_info->desc_set_used_mask; - - if (!user_sgpr_info->indirect_all_descriptor_sets) { - while (mask) { - int i = u_bit_scan(&mask); - - set_loc_desc(args, i, user_sgpr_idx); - } - } else { - set_loc_shader_ptr(args, AC_UD_INDIRECT_DESCRIPTOR_SETS, - user_sgpr_idx); - - args->shader_info->need_indirect_descriptor_sets = true; - } - - if (args->shader_info->loads_push_constants) { - set_loc_shader_ptr(args, AC_UD_PUSH_CONSTANTS, user_sgpr_idx); - } - - if (args->shader_info->num_inline_push_consts) { - set_loc_shader(args, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx, - args->shader_info->num_inline_push_consts); - } - - if (args->streamout_buffers.used) { - set_loc_shader_ptr(args, AC_UD_STREAMOUT_BUFFERS, - user_sgpr_idx); - } -} - static void load_descriptor_sets(struct radv_shader_context *ctx) { @@ -741,30 +362,6 @@ load_descriptor_sets(struct radv_shader_context *ctx) } } - -static void -set_vs_specific_input_locs(struct radv_shader_args *args, - gl_shader_stage stage, bool has_previous_stage, - gl_shader_stage previous_stage, - uint8_t *user_sgpr_idx) -{ - if (!args->is_gs_copy_shader && - (stage == MESA_SHADER_VERTEX || - (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { - if (args->shader_info->vs.has_vertex_buffers) { - set_loc_shader_ptr(args, AC_UD_VS_VERTEX_BUFFERS, - user_sgpr_idx); - } - - unsigned vs_num = 2; - if (args->shader_info->vs.needs_draw_id) - vs_num++; - - set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE, - user_sgpr_idx, vs_num); - } -} - static enum ac_llvm_calling_convention get_llvm_calling_convention(LLVMValueRef func, gl_shader_stage stage) { @@ -796,313 +393,6 @@ static bool is_pre_gs_stage(gl_shader_stage stage) return stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL; } -static void declare_inputs(struct radv_shader_args *args, - gl_shader_stage stage, - bool has_previous_stage, - gl_shader_stage previous_stage) -{ - struct user_sgpr_info user_sgpr_info; - bool needs_view_index = needs_view_index_sgpr(args, stage); - - if (args->options->chip_class >= GFX10) { - if (is_pre_gs_stage(stage) && args->options->key.vs_common_out.as_ngg) { - /* On GFX10, VS is merged into GS for NGG. */ - previous_stage = stage; - stage = MESA_SHADER_GEOMETRY; - has_previous_stage = true; - } - } - - for (int i = 0; i < MAX_SETS; i++) - args->shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1; - for (int i = 0; i < AC_UD_MAX_UD; i++) - args->shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1; - - - allocate_user_sgprs(args, stage, has_previous_stage, - previous_stage, needs_view_index, &user_sgpr_info); - - if (user_sgpr_info.need_ring_offsets && !args->options->supports_spill) { - ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, - &args->ring_offsets); - } - - switch (stage) { - case MESA_SHADER_COMPUTE: - declare_global_input_sgprs(args, &user_sgpr_info); - - if (args->shader_info->cs.uses_grid_size) { - ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, - &args->ac.num_work_groups); - } - - for (int i = 0; i < 3; i++) { - if (args->shader_info->cs.uses_block_id[i]) { - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->ac.workgroup_ids[i]); - } - } - - if (args->shader_info->cs.uses_local_invocation_idx) { - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->ac.tg_size); - } - - ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, - &args->ac.local_invocation_ids); - break; - case MESA_SHADER_VERTEX: - declare_global_input_sgprs(args, &user_sgpr_info); - - declare_vs_specific_input_sgprs(args, stage, has_previous_stage, - previous_stage); - - if (needs_view_index) { - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->ac.view_index); - } - - if (args->options->key.vs_common_out.as_es) { - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->es2gs_offset); - } else if (args->options->key.vs_common_out.as_ls) { - /* no extra parameters */ - } else { - declare_streamout_sgprs(args, stage); - } - - declare_vs_input_vgprs(args); - break; - case MESA_SHADER_TESS_CTRL: - if (has_previous_stage) { - // First 6 system regs - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->merged_wave_info); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->tess_factor_offset); - - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown - - declare_global_input_sgprs(args, &user_sgpr_info); - - declare_vs_specific_input_sgprs(args, stage, - has_previous_stage, - previous_stage); - - if (needs_view_index) { - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->ac.view_index); - } - - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->ac.tcs_patch_id); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->ac.tcs_rel_ids); - - declare_vs_input_vgprs(args); - } else { - declare_global_input_sgprs(args, &user_sgpr_info); - - if (needs_view_index) { - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->ac.view_index); - } - - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->tess_factor_offset); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->ac.tcs_patch_id); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->ac.tcs_rel_ids); - } - break; - case MESA_SHADER_TESS_EVAL: - declare_global_input_sgprs(args, &user_sgpr_info); - - if (needs_view_index) - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->ac.view_index); - - if (args->options->key.vs_common_out.as_es) { - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->es2gs_offset); - } else { - declare_streamout_sgprs(args, stage); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); - } - declare_tes_input_vgprs(args); - break; - case MESA_SHADER_GEOMETRY: - if (has_previous_stage) { - // First 6 system regs - if (args->options->key.vs_common_out.as_ngg) { - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->gs_tg_info); - } else { - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->gs2vs_offset); - } - - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->merged_wave_info); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); - - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown - - declare_global_input_sgprs(args, &user_sgpr_info); - - if (previous_stage != MESA_SHADER_TESS_EVAL) { - declare_vs_specific_input_sgprs(args, stage, - has_previous_stage, - previous_stage); - } - - if (needs_view_index) { - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->ac.view_index); - } - - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->gs_vtx_offset[0]); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->gs_vtx_offset[2]); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->ac.gs_prim_id); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->ac.gs_invocation_id); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->gs_vtx_offset[4]); - - if (previous_stage == MESA_SHADER_VERTEX) { - declare_vs_input_vgprs(args); - } else { - declare_tes_input_vgprs(args); - } - } else { - declare_global_input_sgprs(args, &user_sgpr_info); - - if (needs_view_index) { - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->ac.view_index); - } - - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs2vs_offset); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs_wave_id); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->gs_vtx_offset[0]); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->gs_vtx_offset[1]); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->ac.gs_prim_id); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->gs_vtx_offset[2]); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->gs_vtx_offset[3]); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->gs_vtx_offset[4]); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->gs_vtx_offset[5]); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->ac.gs_invocation_id); - } - break; - case MESA_SHADER_FRAGMENT: - declare_global_input_sgprs(args, &user_sgpr_info); - - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask); - ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample); - ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center); - ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid); - ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, NULL); /* persp pull model */ - ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample); - ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center); - ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */ - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */ - break; - default: - unreachable("Shader stage not implemented"); - } - - args->shader_info->num_input_vgprs = 0; - args->shader_info->num_input_sgprs = args->options->supports_spill ? 2 : 0; - args->shader_info->num_input_sgprs += args->ac.num_sgprs_used; - - if (stage != MESA_SHADER_FRAGMENT) - args->shader_info->num_input_vgprs = args->ac.num_vgprs_used; - - uint8_t user_sgpr_idx = 0; - - if (args->options->supports_spill || user_sgpr_info.need_ring_offsets) { - set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS, - &user_sgpr_idx); - } - - /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including - * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */ - if (has_previous_stage) - user_sgpr_idx = 0; - - set_global_input_locs(args, &user_sgpr_info, &user_sgpr_idx); - - switch (stage) { - case MESA_SHADER_COMPUTE: - if (args->shader_info->cs.uses_grid_size) { - set_loc_shader(args, AC_UD_CS_GRID_SIZE, - &user_sgpr_idx, 3); - } - break; - case MESA_SHADER_VERTEX: - set_vs_specific_input_locs(args, stage, has_previous_stage, - previous_stage, &user_sgpr_idx); - if (args->ac.view_index.used) - set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); - break; - case MESA_SHADER_TESS_CTRL: - set_vs_specific_input_locs(args, stage, has_previous_stage, - previous_stage, &user_sgpr_idx); - if (args->ac.view_index.used) - set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); - break; - case MESA_SHADER_TESS_EVAL: - if (args->ac.view_index.used) - set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); - break; - case MESA_SHADER_GEOMETRY: - if (has_previous_stage) { - if (previous_stage == MESA_SHADER_VERTEX) - set_vs_specific_input_locs(args, stage, - has_previous_stage, - previous_stage, - &user_sgpr_idx); - } - if (args->ac.view_index.used) - set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); - break; - case MESA_SHADER_FRAGMENT: - break; - default: - unreachable("Shader stage not implemented"); - } - - args->shader_info->num_user_sgprs = user_sgpr_idx; -} - static void create_function(struct radv_shader_context *ctx, gl_shader_stage stage, bool has_previous_stage) @@ -4764,39 +4054,33 @@ static LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, struct nir_shader *const *shaders, int shader_count, - struct radv_shader_info *shader_info, - const struct radv_nir_compiler_options *options) + const struct radv_shader_args *args) { struct radv_shader_context ctx = {0}; - struct radv_shader_args args = {0}; - args.options = options; - args.shader_info = shader_info; - ctx.args = &args; - - declare_inputs(&args, shaders[shader_count - 1]->info.stage, shader_count >= 2, - shader_count >= 2 ? shaders[shader_count - 2]->info.stage : MESA_SHADER_VERTEX); + ctx.args = args; enum ac_float_mode float_mode = AC_FLOAT_MODE_DEFAULT; - if (shader_info->float_controls_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) { + if (args->shader_info->float_controls_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) { float_mode = AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO; } - ac_llvm_context_init(&ctx.ac, ac_llvm, options->chip_class, - options->family, float_mode, shader_info->wave_size, 64); + ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class, + args->options->family, float_mode, + args->shader_info->wave_size, 64); ctx.context = ctx.ac.context; ctx.max_workgroup_size = 0; for (int i = 0; i < shader_count; ++i) { ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size, - radv_nir_get_max_workgroup_size(args.options->chip_class, + radv_nir_get_max_workgroup_size(args->options->chip_class, shaders[i]->info.stage, shaders[i])); } if (ctx.ac.chip_class >= GFX10) { if (is_pre_gs_stage(shaders[0]->info.stage) && - options->key.vs_common_out.as_ngg) { + args->options->key.vs_common_out.as_ngg) { ctx.max_workgroup_size = 128; } } @@ -4811,20 +4095,20 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ctx.abi.load_sampler_desc = radv_get_sampler_desc; ctx.abi.load_resource = radv_load_resource; ctx.abi.clamp_shadow_reference = false; - ctx.abi.robust_buffer_access = options->robust_buffer_access; + ctx.abi.robust_buffer_access = args->options->robust_buffer_access; - bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) && args.options->key.vs_common_out.as_ngg; + bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) && args->options->key.vs_common_out.as_ngg; if (shader_count >= 2 || is_ngg) ac_init_exec_full_mask(&ctx.ac); - if (args.ac.vertex_id.used) - ctx.abi.vertex_id = ac_get_arg(&ctx.ac, args.ac.vertex_id); - if (args.rel_auto_id.used) - ctx.rel_auto_id = ac_get_arg(&ctx.ac, args.rel_auto_id); - if (args.ac.instance_id.used) - ctx.abi.instance_id = ac_get_arg(&ctx.ac, args.ac.instance_id); + if (args->ac.vertex_id.used) + ctx.abi.vertex_id = ac_get_arg(&ctx.ac, args->ac.vertex_id); + if (args->rel_auto_id.used) + ctx.rel_auto_id = ac_get_arg(&ctx.ac, args->rel_auto_id); + if (args->ac.instance_id.used) + ctx.abi.instance_id = ac_get_arg(&ctx.ac, args->ac.instance_id); - if (options->has_ls_vgpr_init_bug && + if (args->options->has_ls_vgpr_init_bug && shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL) ac_nir_fixup_ls_hs_input_vgprs(&ctx); @@ -4858,7 +4142,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ctx.gs_next_vertex[i] = ac_build_alloca(&ctx.ac, ctx.ac.i32, ""); } - if (args.options->key.vs_common_out.as_ngg) { + if (args->options->key.vs_common_out.as_ngg) { for (unsigned i = 0; i < 4; ++i) { ctx.gs_curprim_verts[i] = ac_build_alloca(&ctx.ac, ctx.ac.i32, ""); @@ -4867,7 +4151,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, } unsigned scratch_size = 8; - if (args.shader_info->so.num_outputs) + if (args->shader_info->so.num_outputs) scratch_size = 44; LLVMTypeRef ai32 = LLVMArrayType(ctx.ac.i32, scratch_size); @@ -4890,15 +4174,15 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ctx.abi.load_patch_vertices_in = load_patch_vertices_in; ctx.abi.store_tcs_outputs = store_tcs_output; if (shader_count == 1) - ctx.tcs_num_inputs = args.options->key.tcs.num_inputs; + ctx.tcs_num_inputs = args->options->key.tcs.num_inputs; else - ctx.tcs_num_inputs = util_last_bit64(shader_info->vs.ls_outputs_written); + ctx.tcs_num_inputs = util_last_bit64(args->shader_info->vs.ls_outputs_written); ctx.tcs_num_patches = get_tcs_num_patches(&ctx); } else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) { ctx.abi.load_tess_varyings = load_tes_input; ctx.abi.load_tess_coord = load_tess_coord; ctx.abi.load_patch_vertices_in = load_patch_vertices_in; - ctx.tcs_num_patches = args.options->key.tes.num_patches; + ctx.tcs_num_patches = args->options->key.tes.num_patches; } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) { ctx.abi.load_base_vertex = radv_load_base_vertex; } else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) { @@ -4908,8 +4192,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, } if (shaders[i]->info.stage == MESA_SHADER_VERTEX && - args.options->key.vs_common_out.as_ngg && - args.options->key.vs_common_out.export_prim_id) { + args->options->key.vs_common_out.as_ngg && + args->options->key.vs_common_out.export_prim_id) { declare_esgs_ring(&ctx); } @@ -4917,7 +4201,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, if (i) { if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY && - args.options->key.vs_common_out.as_ngg) { + args->options->key.vs_common_out.as_ngg) { gfx10_ngg_gs_emit_prologue(&ctx); nested_barrier = false; } else { @@ -4959,7 +4243,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, LLVMValueRef count = ac_unpack_param(&ctx.ac, - ac_get_arg(&ctx.ac, args.merged_wave_info), + ac_get_arg(&ctx.ac, args->merged_wave_info), 8 * i, 8); LLVMValueRef thread_id = ac_get_thread_id(&ctx.ac); LLVMValueRef cond = LLVMBuildICmp(ctx.ac.builder, LLVMIntULT, @@ -4976,7 +4260,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, else if(shaders[i]->info.stage == MESA_SHADER_GEOMETRY) prepare_gs_input_vgprs(&ctx, shader_count >= 2); - ac_nir_translate(&ctx.ac, &ctx.abi, &args.ac, shaders[i]); + ac_nir_translate(&ctx.ac, &ctx.abi, &args->ac, shaders[i]); if (shader_count >= 2 || is_ngg) { LLVMBuildBr(ctx.ac.builder, merge_block); @@ -4986,37 +4270,37 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, /* This needs to be outside the if wrapping the shader body, as sometimes * the HW generates waves with 0 es/vs threads. */ if (is_pre_gs_stage(shaders[i]->info.stage) && - args.options->key.vs_common_out.as_ngg && + args->options->key.vs_common_out.as_ngg && i == shader_count - 1) { handle_ngg_outputs_post_2(&ctx); } else if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY && - args.options->key.vs_common_out.as_ngg) { + args->options->key.vs_common_out.as_ngg) { gfx10_ngg_gs_emit_epilogue_2(&ctx); } if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) { - shader_info->tcs.num_patches = ctx.tcs_num_patches; - shader_info->tcs.lds_size = calculate_tess_lds_size(&ctx); + args->shader_info->tcs.num_patches = ctx.tcs_num_patches; + args->shader_info->tcs.lds_size = calculate_tess_lds_size(&ctx); } } LLVMBuildRetVoid(ctx.ac.builder); - if (options->dump_preoptir) { + if (args->options->dump_preoptir) { fprintf(stderr, "%s LLVM IR:\n\n", - radv_get_shader_name(shader_info, + radv_get_shader_name(args->shader_info, shaders[shader_count - 1]->info.stage)); ac_dump_module(ctx.ac.module); fprintf(stderr, "\n"); } - ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, options); + ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, args->options); if (shader_count == 1) ac_nir_eliminate_const_vs_outputs(&ctx); - if (options->dump_shader) { - args.shader_info->private_mem_vgprs = + if (args->options->dump_shader) { + args->shader_info->private_mem_vgprs = ac_count_scratch_private_memory(ctx.main_function); } @@ -5110,28 +4394,26 @@ static void ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, void radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm, struct radv_shader_binary **rbinary, - struct radv_shader_info *shader_info, + const struct radv_shader_args *args, struct nir_shader *const *nir, - int nir_count, - const struct radv_nir_compiler_options *options) + int nir_count) { LLVMModuleRef llvm_module; - llvm_module = ac_translate_nir_to_llvm(ac_llvm, nir, nir_count, shader_info, - options); + llvm_module = ac_translate_nir_to_llvm(ac_llvm, nir, nir_count, args); ac_compile_llvm_module(ac_llvm, llvm_module, rbinary, nir[nir_count - 1]->info.stage, - radv_get_shader_name(shader_info, + radv_get_shader_name(args->shader_info, nir[nir_count - 1]->info.stage), - options); + args->options); /* Determine the ES type (VS or TES) for the GS on GFX9. */ - if (options->chip_class >= GFX9) { + if (args->options->chip_class >= GFX9) { if (nir_count == 2 && nir[1]->info.stage == MESA_SHADER_GEOMETRY) { - shader_info->gs.es_type = nir[0]->info.stage; + args->shader_info->gs.es_type = nir[0]->info.stage; } } } @@ -5239,20 +4521,15 @@ void radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm, struct nir_shader *geom_shader, struct radv_shader_binary **rbinary, - struct radv_shader_info *shader_info, - const struct radv_nir_compiler_options *options) + const struct radv_shader_args *args) { struct radv_shader_context ctx = {0}; - struct radv_shader_args args = {0}; - args.options = options; - args.shader_info = shader_info; - ctx.args = &args; + ctx.args = args; - args.is_gs_copy_shader = true; - declare_inputs(&args, MESA_SHADER_VERTEX, false, MESA_SHADER_VERTEX); + assert(args->is_gs_copy_shader); - ac_llvm_context_init(&ctx.ac, ac_llvm, options->chip_class, - options->family, AC_FLOAT_MODE_DEFAULT, 64, 64); + ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class, + args->options->family, AC_FLOAT_MODE_DEFAULT, 64, 64); ctx.context = ctx.ac.context; ctx.stage = MESA_SHADER_VERTEX; @@ -5272,10 +4549,10 @@ radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm, LLVMBuildRetVoid(ctx.ac.builder); - ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, options); + ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, args->options); ac_compile_llvm_module(ac_llvm, ctx.ac.module, rbinary, - MESA_SHADER_VERTEX, "GS Copy Shader", options); + MESA_SHADER_VERTEX, "GS Copy Shader", args->options); (*rbinary)->is_gs_copy_shader = true; } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 8066505e245..5d63f4145f2 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -2298,21 +2298,18 @@ struct radv_fence { }; /* radv_nir_to_llvm.c */ -struct radv_shader_info; -struct radv_nir_compiler_options; +struct radv_shader_args; void radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm, struct nir_shader *geom_shader, struct radv_shader_binary **rbinary, - struct radv_shader_info *info, - const struct radv_nir_compiler_options *option); + const struct radv_shader_args *args); void radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm, struct radv_shader_binary **rbinary, - struct radv_shader_info *info, + const struct radv_shader_args *args, struct nir_shader *const *nir, - int nir_count, - const struct radv_nir_compiler_options *options); + int nir_count); unsigned radv_nir_get_max_workgroup_size(enum chip_class chip_class, gl_shader_stage stage, diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 1e550526f56..8ba83ff77c7 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -31,6 +31,7 @@ #include "radv_private.h" #include "radv_shader.h" #include "radv_shader_helper.h" +#include "radv_shader_args.h" #include "nir/nir.h" #include "nir/nir_builder.h" #include "spirv/nir_spirv.h" @@ -1095,6 +1096,17 @@ shader_variant_compile(struct radv_device *device, options->has_ls_vgpr_init_bug = device->physical_device->rad_info.has_ls_vgpr_init_bug; options->use_ngg_streamout = device->physical_device->use_ngg_streamout; + struct radv_shader_args args = {}; + args.options = options; + args.shader_info = info; + args.is_gs_copy_shader = gs_copy_shader; + radv_declare_shader_args(&args, + gs_copy_shader ? MESA_SHADER_VERTEX + : shaders[shader_count - 1]->info.stage, + shader_count >= 2, + shader_count >= 2 ? shaders[shader_count - 2]->info.stage + : MESA_SHADER_VERTEX); + if (!use_aco || options->dump_shader || options->record_ir) ac_init_llvm_once(); @@ -1124,10 +1136,10 @@ shader_variant_compile(struct radv_device *device, if (gs_copy_shader) { assert(shader_count == 1); radv_compile_gs_copy_shader(&ac_llvm, *shaders, &binary, - info, options); + &args); } else { - radv_compile_nir_shader(&ac_llvm, &binary, info, - shaders, shader_count, options); + radv_compile_nir_shader(&ac_llvm, &binary, &args, + shaders, shader_count); } binary->info = *info; diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c new file mode 100644 index 00000000000..949b91dcf94 --- /dev/null +++ b/src/amd/vulkan/radv_shader_args.c @@ -0,0 +1,747 @@ +/* + * Copyright © 2019 Valve Corporation. + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * based in part on anv driver which is: + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "radv_private.h" +#include "radv_shader.h" +#include "radv_shader_args.h" + +static void +set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx, + uint8_t num_sgprs) +{ + ud_info->sgpr_idx = *sgpr_idx; + ud_info->num_sgprs = num_sgprs; + *sgpr_idx += num_sgprs; +} + +static void +set_loc_shader(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx, + uint8_t num_sgprs) +{ + struct radv_userdata_info *ud_info = + &args->shader_info->user_sgprs_locs.shader_data[idx]; + assert(ud_info); + + set_loc(ud_info, sgpr_idx, num_sgprs); +} + +static void +set_loc_shader_ptr(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx) +{ + bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS; + + set_loc_shader(args, idx, sgpr_idx, use_32bit_pointers ? 1 : 2); +} + +static void +set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx) +{ + struct radv_userdata_locations *locs = + &args->shader_info->user_sgprs_locs; + struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx]; + assert(ud_info); + + set_loc(ud_info, sgpr_idx, 1); + + locs->descriptor_sets_enabled |= 1 << idx; +} + +struct user_sgpr_info { + bool need_ring_offsets; + bool indirect_all_descriptor_sets; + uint8_t remaining_sgprs; +}; + +static bool needs_view_index_sgpr(struct radv_shader_args *args, + gl_shader_stage stage) +{ + switch (stage) { + case MESA_SHADER_VERTEX: + if (args->shader_info->needs_multiview_view_index || + (!args->options->key.vs_common_out.as_es && !args->options->key.vs_common_out.as_ls && args->options->key.has_multiview_view_index)) + return true; + break; + case MESA_SHADER_TESS_EVAL: + if (args->shader_info->needs_multiview_view_index || (!args->options->key.vs_common_out.as_es && args->options->key.has_multiview_view_index)) + return true; + break; + case MESA_SHADER_GEOMETRY: + case MESA_SHADER_TESS_CTRL: + if (args->shader_info->needs_multiview_view_index) + return true; + break; + default: + break; + } + return false; +} + +static uint8_t +count_vs_user_sgprs(struct radv_shader_args *args) +{ + uint8_t count = 0; + + if (args->shader_info->vs.has_vertex_buffers) + count++; + count += args->shader_info->vs.needs_draw_id ? 3 : 2; + + return count; +} + +static void allocate_inline_push_consts(struct radv_shader_args *args, + struct user_sgpr_info *user_sgpr_info) +{ + uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs; + + /* Only supported if shaders use push constants. */ + if (args->shader_info->min_push_constant_used == UINT8_MAX) + return; + + /* Only supported if shaders don't have indirect push constants. */ + if (args->shader_info->has_indirect_push_constants) + return; + + /* Only supported for 32-bit push constants. */ + if (!args->shader_info->has_only_32bit_push_constants) + return; + + uint8_t num_push_consts = + (args->shader_info->max_push_constant_used - + args->shader_info->min_push_constant_used) / 4; + + /* Check if the number of user SGPRs is large enough. */ + if (num_push_consts < remaining_sgprs) { + args->shader_info->num_inline_push_consts = num_push_consts; + } else { + args->shader_info->num_inline_push_consts = remaining_sgprs; + } + + /* Clamp to the maximum number of allowed inlined push constants. */ + if (args->shader_info->num_inline_push_consts > AC_MAX_INLINE_PUSH_CONSTS) + args->shader_info->num_inline_push_consts = AC_MAX_INLINE_PUSH_CONSTS; + + if (args->shader_info->num_inline_push_consts == num_push_consts && + !args->shader_info->loads_dynamic_offsets) { + /* Disable the default push constants path if all constants are + * inlined and if shaders don't use dynamic descriptors. + */ + args->shader_info->loads_push_constants = false; + } + + args->shader_info->base_inline_push_consts = + args->shader_info->min_push_constant_used / 4; +} + +static void allocate_user_sgprs(struct radv_shader_args *args, + gl_shader_stage stage, + bool has_previous_stage, + gl_shader_stage previous_stage, + bool needs_view_index, + struct user_sgpr_info *user_sgpr_info) +{ + uint8_t user_sgpr_count = 0; + + memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info)); + + /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */ + if (stage == MESA_SHADER_GEOMETRY || + stage == MESA_SHADER_VERTEX || + stage == MESA_SHADER_TESS_CTRL || + stage == MESA_SHADER_TESS_EVAL || + args->is_gs_copy_shader) + user_sgpr_info->need_ring_offsets = true; + + if (stage == MESA_SHADER_FRAGMENT && + args->shader_info->ps.needs_sample_positions) + user_sgpr_info->need_ring_offsets = true; + + /* 2 user sgprs will nearly always be allocated for scratch/rings */ + if (args->options->supports_spill || user_sgpr_info->need_ring_offsets) { + user_sgpr_count += 2; + } + + switch (stage) { + case MESA_SHADER_COMPUTE: + if (args->shader_info->cs.uses_grid_size) + user_sgpr_count += 3; + break; + case MESA_SHADER_FRAGMENT: + user_sgpr_count += args->shader_info->ps.needs_sample_positions; + break; + case MESA_SHADER_VERTEX: + if (!args->is_gs_copy_shader) + user_sgpr_count += count_vs_user_sgprs(args); + break; + case MESA_SHADER_TESS_CTRL: + if (has_previous_stage) { + if (previous_stage == MESA_SHADER_VERTEX) + user_sgpr_count += count_vs_user_sgprs(args); + } + break; + case MESA_SHADER_TESS_EVAL: + break; + case MESA_SHADER_GEOMETRY: + if (has_previous_stage) { + if (previous_stage == MESA_SHADER_VERTEX) { + user_sgpr_count += count_vs_user_sgprs(args); + } + } + break; + default: + break; + } + + if (needs_view_index) + user_sgpr_count++; + + if (args->shader_info->loads_push_constants) + user_sgpr_count++; + + if (args->shader_info->so.num_outputs) + user_sgpr_count++; + + uint32_t available_sgprs = args->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16; + uint32_t remaining_sgprs = available_sgprs - user_sgpr_count; + uint32_t num_desc_set = + util_bitcount(args->shader_info->desc_set_used_mask); + + if (remaining_sgprs < num_desc_set) { + user_sgpr_info->indirect_all_descriptor_sets = true; + user_sgpr_info->remaining_sgprs = remaining_sgprs - 1; + } else { + user_sgpr_info->remaining_sgprs = remaining_sgprs - num_desc_set; + } + + allocate_inline_push_consts(args, user_sgpr_info); +} + +static void +declare_global_input_sgprs(struct radv_shader_args *args, + const struct user_sgpr_info *user_sgpr_info) +{ + /* 1 for each descriptor set */ + if (!user_sgpr_info->indirect_all_descriptor_sets) { + uint32_t mask = args->shader_info->desc_set_used_mask; + + while (mask) { + int i = u_bit_scan(&mask); + + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, + &args->descriptor_sets[i]); + } + } else { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR_PTR, + &args->descriptor_sets[0]); + } + + if (args->shader_info->loads_push_constants) { + /* 1 for push constants and dynamic descriptors */ + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, + &args->ac.push_constants); + } + + for (unsigned i = 0; i < args->shader_info->num_inline_push_consts; i++) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->ac.inline_push_consts[i]); + } + args->ac.num_inline_push_consts = args->shader_info->num_inline_push_consts; + args->ac.base_inline_push_consts = args->shader_info->base_inline_push_consts; + + if (args->shader_info->so.num_outputs) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, + &args->streamout_buffers); + } +} + +static void +declare_vs_specific_input_sgprs(struct radv_shader_args *args, + gl_shader_stage stage, + bool has_previous_stage, + gl_shader_stage previous_stage) +{ + if (!args->is_gs_copy_shader && + (stage == MESA_SHADER_VERTEX || + (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { + if (args->shader_info->vs.has_vertex_buffers) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, + &args->vertex_buffers); + } + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance); + if (args->shader_info->vs.needs_draw_id) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id); + } + } +} + +static void +declare_vs_input_vgprs(struct radv_shader_args *args) +{ + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id); + if (!args->is_gs_copy_shader) { + if (args->options->key.vs_common_out.as_ls) { + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->rel_auto_id); + if (args->options->chip_class >= GFX10) { + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); + } else { + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */ + } + } else { + if (args->options->chip_class >= GFX10) { + if (args->options->key.vs_common_out.as_ngg) { + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); + } else { + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */ + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->vs_prim_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); + } + } else { + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->vs_prim_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */ + } + } + } +} + +static void +declare_streamout_sgprs(struct radv_shader_args *args, gl_shader_stage stage) +{ + int i; + + if (args->options->use_ngg_streamout) { + if (stage == MESA_SHADER_TESS_EVAL) + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); + return; + } + + /* Streamout SGPRs. */ + if (args->shader_info->so.num_outputs) { + assert(stage == MESA_SHADER_VERTEX || + stage == MESA_SHADER_TESS_EVAL); + + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_config); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_write_idx); + } else if (stage == MESA_SHADER_TESS_EVAL) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); + } + + /* A streamout buffer offset is loaded if the stride is non-zero. */ + for (i = 0; i < 4; i++) { + if (!args->shader_info->so.strides[i]) + continue; + + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_offset[i]); + } +} + +static void +declare_tes_input_vgprs(struct radv_shader_args *args) +{ + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->tes_u); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->tes_v); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->tes_rel_patch_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id); +} + +static void +set_global_input_locs(struct radv_shader_args *args, + const struct user_sgpr_info *user_sgpr_info, + uint8_t *user_sgpr_idx) +{ + uint32_t mask = args->shader_info->desc_set_used_mask; + + if (!user_sgpr_info->indirect_all_descriptor_sets) { + while (mask) { + int i = u_bit_scan(&mask); + + set_loc_desc(args, i, user_sgpr_idx); + } + } else { + set_loc_shader_ptr(args, AC_UD_INDIRECT_DESCRIPTOR_SETS, + user_sgpr_idx); + + args->shader_info->need_indirect_descriptor_sets = true; + } + + if (args->shader_info->loads_push_constants) { + set_loc_shader_ptr(args, AC_UD_PUSH_CONSTANTS, user_sgpr_idx); + } + + if (args->shader_info->num_inline_push_consts) { + set_loc_shader(args, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx, + args->shader_info->num_inline_push_consts); + } + + if (args->streamout_buffers.used) { + set_loc_shader_ptr(args, AC_UD_STREAMOUT_BUFFERS, + user_sgpr_idx); + } +} + +static void +set_vs_specific_input_locs(struct radv_shader_args *args, + gl_shader_stage stage, bool has_previous_stage, + gl_shader_stage previous_stage, + uint8_t *user_sgpr_idx) +{ + if (!args->is_gs_copy_shader && + (stage == MESA_SHADER_VERTEX || + (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { + if (args->shader_info->vs.has_vertex_buffers) { + set_loc_shader_ptr(args, AC_UD_VS_VERTEX_BUFFERS, + user_sgpr_idx); + } + + unsigned vs_num = 2; + if (args->shader_info->vs.needs_draw_id) + vs_num++; + + set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE, + user_sgpr_idx, vs_num); + } +} + +/* Returns whether the stage is a stage that can be directly before the GS */ +static bool is_pre_gs_stage(gl_shader_stage stage) +{ + return stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL; +} + +void +radv_declare_shader_args(struct radv_shader_args *args, + gl_shader_stage stage, + bool has_previous_stage, + gl_shader_stage previous_stage) +{ + struct user_sgpr_info user_sgpr_info; + bool needs_view_index = needs_view_index_sgpr(args, stage); + + if (args->options->chip_class >= GFX10) { + if (is_pre_gs_stage(stage) && args->options->key.vs_common_out.as_ngg) { + /* On GFX10, VS is merged into GS for NGG. */ + previous_stage = stage; + stage = MESA_SHADER_GEOMETRY; + has_previous_stage = true; + } + } + + for (int i = 0; i < MAX_SETS; i++) + args->shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1; + for (int i = 0; i < AC_UD_MAX_UD; i++) + args->shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1; + + + allocate_user_sgprs(args, stage, has_previous_stage, + previous_stage, needs_view_index, &user_sgpr_info); + + if (user_sgpr_info.need_ring_offsets && !args->options->supports_spill) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, + &args->ring_offsets); + } + + switch (stage) { + case MESA_SHADER_COMPUTE: + declare_global_input_sgprs(args, &user_sgpr_info); + + if (args->shader_info->cs.uses_grid_size) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, + &args->ac.num_work_groups); + } + + for (int i = 0; i < 3; i++) { + if (args->shader_info->cs.uses_block_id[i]) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->ac.workgroup_ids[i]); + } + } + + if (args->shader_info->cs.uses_local_invocation_idx) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->ac.tg_size); + } + + ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, + &args->ac.local_invocation_ids); + break; + case MESA_SHADER_VERTEX: + declare_global_input_sgprs(args, &user_sgpr_info); + + declare_vs_specific_input_sgprs(args, stage, has_previous_stage, + previous_stage); + + if (needs_view_index) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->ac.view_index); + } + + if (args->options->key.vs_common_out.as_es) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->es2gs_offset); + } else if (args->options->key.vs_common_out.as_ls) { + /* no extra parameters */ + } else { + declare_streamout_sgprs(args, stage); + } + + declare_vs_input_vgprs(args); + break; + case MESA_SHADER_TESS_CTRL: + if (has_previous_stage) { + // First 6 system regs + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->merged_wave_info); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->tess_factor_offset); + + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown + + declare_global_input_sgprs(args, &user_sgpr_info); + + declare_vs_specific_input_sgprs(args, stage, + has_previous_stage, + previous_stage); + + if (needs_view_index) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->ac.view_index); + } + + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->ac.tcs_patch_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->ac.tcs_rel_ids); + + declare_vs_input_vgprs(args); + } else { + declare_global_input_sgprs(args, &user_sgpr_info); + + if (needs_view_index) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->ac.view_index); + } + + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->tess_factor_offset); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->ac.tcs_patch_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->ac.tcs_rel_ids); + } + break; + case MESA_SHADER_TESS_EVAL: + declare_global_input_sgprs(args, &user_sgpr_info); + + if (needs_view_index) + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->ac.view_index); + + if (args->options->key.vs_common_out.as_es) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->es2gs_offset); + } else { + declare_streamout_sgprs(args, stage); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); + } + declare_tes_input_vgprs(args); + break; + case MESA_SHADER_GEOMETRY: + if (has_previous_stage) { + // First 6 system regs + if (args->options->key.vs_common_out.as_ngg) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->gs_tg_info); + } else { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->gs2vs_offset); + } + + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->merged_wave_info); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); + + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown + + declare_global_input_sgprs(args, &user_sgpr_info); + + if (previous_stage != MESA_SHADER_TESS_EVAL) { + declare_vs_specific_input_sgprs(args, stage, + has_previous_stage, + previous_stage); + } + + if (needs_view_index) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->ac.view_index); + } + + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->gs_vtx_offset[0]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->gs_vtx_offset[2]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->ac.gs_prim_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->ac.gs_invocation_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->gs_vtx_offset[4]); + + if (previous_stage == MESA_SHADER_VERTEX) { + declare_vs_input_vgprs(args); + } else { + declare_tes_input_vgprs(args); + } + } else { + declare_global_input_sgprs(args, &user_sgpr_info); + + if (needs_view_index) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->ac.view_index); + } + + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs2vs_offset); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs_wave_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->gs_vtx_offset[0]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->gs_vtx_offset[1]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->ac.gs_prim_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->gs_vtx_offset[2]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->gs_vtx_offset[3]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->gs_vtx_offset[4]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->gs_vtx_offset[5]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, + &args->ac.gs_invocation_id); + } + break; + case MESA_SHADER_FRAGMENT: + declare_global_input_sgprs(args, &user_sgpr_info); + + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask); + ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample); + ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center); + ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid); + ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, NULL); /* persp pull model */ + ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample); + ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center); + ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */ + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */ + break; + default: + unreachable("Shader stage not implemented"); + } + + args->shader_info->num_input_vgprs = 0; + args->shader_info->num_input_sgprs = args->options->supports_spill ? 2 : 0; + args->shader_info->num_input_sgprs += args->ac.num_sgprs_used; + + if (stage != MESA_SHADER_FRAGMENT) + args->shader_info->num_input_vgprs = args->ac.num_vgprs_used; + + uint8_t user_sgpr_idx = 0; + + if (args->options->supports_spill || user_sgpr_info.need_ring_offsets) { + set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS, + &user_sgpr_idx); + } + + /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including + * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */ + if (has_previous_stage) + user_sgpr_idx = 0; + + set_global_input_locs(args, &user_sgpr_info, &user_sgpr_idx); + + switch (stage) { + case MESA_SHADER_COMPUTE: + if (args->shader_info->cs.uses_grid_size) { + set_loc_shader(args, AC_UD_CS_GRID_SIZE, + &user_sgpr_idx, 3); + } + break; + case MESA_SHADER_VERTEX: + set_vs_specific_input_locs(args, stage, has_previous_stage, + previous_stage, &user_sgpr_idx); + if (args->ac.view_index.used) + set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + break; + case MESA_SHADER_TESS_CTRL: + set_vs_specific_input_locs(args, stage, has_previous_stage, + previous_stage, &user_sgpr_idx); + if (args->ac.view_index.used) + set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + break; + case MESA_SHADER_TESS_EVAL: + if (args->ac.view_index.used) + set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + break; + case MESA_SHADER_GEOMETRY: + if (has_previous_stage) { + if (previous_stage == MESA_SHADER_VERTEX) + set_vs_specific_input_locs(args, stage, + has_previous_stage, + previous_stage, + &user_sgpr_idx); + } + if (args->ac.view_index.used) + set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + break; + case MESA_SHADER_FRAGMENT: + break; + default: + unreachable("Shader stage not implemented"); + } + + args->shader_info->num_user_sgprs = user_sgpr_idx; +} + diff --git a/src/amd/vulkan/radv_shader_args.h b/src/amd/vulkan/radv_shader_args.h index 5f295b5e331..a7442c617de 100644 --- a/src/amd/vulkan/radv_shader_args.h +++ b/src/amd/vulkan/radv_shader_args.h @@ -24,6 +24,7 @@ #include "ac_shader_args.h" #include "radv_constants.h" #include "util/list.h" +#include "compiler/shader_enums.h" #include "amd_family.h" struct radv_shader_args { @@ -73,4 +74,8 @@ radv_shader_args_from_ac(struct ac_shader_args *args) return (struct radv_shader_args *) container_of(args, radv_args, ac); } +void radv_declare_shader_args(struct radv_shader_args *args, + gl_shader_stage stage, + bool has_previous_stage, + gl_shader_stage previous_stage); -- 2.30.2