- struct si_shader *shader = ctx->shader;
- LLVMTypeRef returns[AC_MAX_ARGS];
- unsigned i, num_return_sgprs;
- unsigned num_returns = 0;
- unsigned num_prolog_vgprs = 0;
- unsigned type = ctx->type;
- unsigned vs_blit_property =
- shader->selector->info.properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD];
-
- memset(&ctx->args, 0, sizeof(ctx->args));
-
- /* Set MERGED shaders. */
- if (ctx->screen->info.chip_class >= GFX9) {
- if (shader->key.as_ls || type == PIPE_SHADER_TESS_CTRL)
- type = SI_SHADER_MERGED_VERTEX_TESSCTRL; /* LS or HS */
- else if (shader->key.as_es || shader->key.as_ngg || type == PIPE_SHADER_GEOMETRY)
- type = SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY;
- }
-
- switch (type) {
- case PIPE_SHADER_VERTEX:
- declare_global_desc_pointers(ctx);
-
- if (vs_blit_property) {
- declare_vs_blit_inputs(ctx, vs_blit_property);
-
- /* VGPRs */
- declare_vs_input_vgprs(ctx, &num_prolog_vgprs, ngg_cull_shader);
- break;
- }
-
- declare_per_stage_desc_pointers(ctx, true);
- declare_vs_specific_input_sgprs(ctx);
- if (!shader->is_gs_copy_shader)
- declare_vb_descriptor_input_sgprs(ctx);
-
- if (shader->key.as_es) {
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT,
- &ctx->es2gs_offset);
- } else if (shader->key.as_ls) {
- /* no extra parameters */
- } else {
- /* The locations of the other parameters are assigned dynamically. */
- declare_streamout_params(ctx, &shader->selector->so);
- }
-
- /* VGPRs */
- declare_vs_input_vgprs(ctx, &num_prolog_vgprs, ngg_cull_shader);
-
- /* Return values */
- if (shader->key.opt.vs_as_prim_discard_cs) {
- for (i = 0; i < 4; i++)
- returns[num_returns++] = ctx->ac.f32; /* VGPRs */
- }
- break;
-
- case PIPE_SHADER_TESS_CTRL: /* GFX6-GFX8 */
- declare_global_desc_pointers(ctx);
- declare_per_stage_desc_pointers(ctx, true);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_offsets);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->vs_state_bits);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_factor_offset);
-
- /* VGPRs */
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.tcs_patch_id);
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.tcs_rel_ids);
-
- /* param_tcs_offchip_offset and param_tcs_factor_offset are
- * placed after the user SGPRs.
- */
- for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++)
- returns[num_returns++] = ctx->ac.i32; /* SGPRs */
- for (i = 0; i < 11; i++)
- returns[num_returns++] = ctx->ac.f32; /* VGPRs */
- break;
-
- case SI_SHADER_MERGED_VERTEX_TESSCTRL:
- /* Merged stages have 8 system SGPRs at the beginning. */
- /* SPI_SHADER_USER_DATA_ADDR_LO/HI_HS */
- declare_per_stage_desc_pointers(ctx,
- ctx->type == PIPE_SHADER_TESS_CTRL);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->merged_wave_info);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_factor_offset);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->merged_scratch_offset);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
-
- declare_global_desc_pointers(ctx);
- declare_per_stage_desc_pointers(ctx,
- ctx->type == PIPE_SHADER_VERTEX);
- declare_vs_specific_input_sgprs(ctx);
-
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_offsets);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout);
- declare_vb_descriptor_input_sgprs(ctx);
-
- /* VGPRs (first TCS, then VS) */
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.tcs_patch_id);
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.tcs_rel_ids);
-
- if (ctx->type == PIPE_SHADER_VERTEX) {
- declare_vs_input_vgprs(ctx, &num_prolog_vgprs, ngg_cull_shader);
-
- /* LS return values are inputs to the TCS main shader part. */
- for (i = 0; i < 8 + GFX9_TCS_NUM_USER_SGPR; i++)
- returns[num_returns++] = ctx->ac.i32; /* SGPRs */
- for (i = 0; i < 2; i++)
- returns[num_returns++] = ctx->ac.f32; /* VGPRs */
- } else {
- /* TCS return values are inputs to the TCS epilog.
- *
- * param_tcs_offchip_offset, param_tcs_factor_offset,
- * param_tcs_offchip_layout, and param_rw_buffers
- * should be passed to the epilog.
- */
- for (i = 0; i <= 8 + GFX9_SGPR_TCS_OUT_LAYOUT; i++)
- returns[num_returns++] = ctx->ac.i32; /* SGPRs */
- for (i = 0; i < 11; i++)
- returns[num_returns++] = ctx->ac.f32; /* VGPRs */
- }
- break;
-
- case SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY:
- /* Merged stages have 8 system SGPRs at the beginning. */
- /* SPI_SHADER_USER_DATA_ADDR_LO/HI_GS */
- declare_per_stage_desc_pointers(ctx,
- ctx->type == PIPE_SHADER_GEOMETRY);
-
- if (ctx->shader->key.as_ngg)
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->gs_tg_info);
- else
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->gs2vs_offset);
-
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->merged_wave_info);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->merged_scratch_offset);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR,
- &ctx->small_prim_cull_info); /* SPI_SHADER_PGM_LO_GS << 8 */
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused (SPI_SHADER_PGM_LO/HI_GS >> 24) */
-
- declare_global_desc_pointers(ctx);
- if (ctx->type != PIPE_SHADER_VERTEX || !vs_blit_property) {
- declare_per_stage_desc_pointers(ctx,
- (ctx->type == PIPE_SHADER_VERTEX ||
- ctx->type == PIPE_SHADER_TESS_EVAL));
- }
-
- if (ctx->type == PIPE_SHADER_VERTEX) {
- if (vs_blit_property)
- declare_vs_blit_inputs(ctx, vs_blit_property);
- else
- declare_vs_specific_input_sgprs(ctx);
- } else {
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->vs_state_bits);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tes_offchip_addr);
- /* Declare as many input SGPRs as the VS has. */
- }
-
- if (ctx->type == PIPE_SHADER_VERTEX)
- declare_vb_descriptor_input_sgprs(ctx);
-
- /* VGPRs (first GS, then VS/TES) */
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx01_offset);
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx23_offset);
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_prim_id);
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_invocation_id);
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx45_offset);
-
- if (ctx->type == PIPE_SHADER_VERTEX) {
- declare_vs_input_vgprs(ctx, &num_prolog_vgprs, ngg_cull_shader);
- } else if (ctx->type == PIPE_SHADER_TESS_EVAL) {
- declare_tes_input_vgprs(ctx, ngg_cull_shader);
- }
-
- if ((ctx->shader->key.as_es || ngg_cull_shader) &&
- (ctx->type == PIPE_SHADER_VERTEX ||
- ctx->type == PIPE_SHADER_TESS_EVAL)) {
- unsigned num_user_sgprs, num_vgprs;
-
- if (ctx->type == PIPE_SHADER_VERTEX) {
- /* For the NGG cull shader, add 1 SGPR to hold
- * the vertex buffer pointer.
- */
- num_user_sgprs = GFX9_VSGS_NUM_USER_SGPR + ngg_cull_shader;
-
- if (ngg_cull_shader && shader->selector->num_vbos_in_user_sgprs) {
- assert(num_user_sgprs <= 8 + SI_SGPR_VS_VB_DESCRIPTOR_FIRST);
- num_user_sgprs = SI_SGPR_VS_VB_DESCRIPTOR_FIRST +
- shader->selector->num_vbos_in_user_sgprs * 4;
- }
- } else {
- num_user_sgprs = GFX9_TESGS_NUM_USER_SGPR;
- }
-
- /* The NGG cull shader has to return all 9 VGPRs + the old thread ID.
- *
- * The normal merged ESGS shader only has to return the 5 VGPRs
- * for the GS stage.
- */
- num_vgprs = ngg_cull_shader ? 10 : 5;
-
- /* ES return values are inputs to GS. */
- for (i = 0; i < 8 + num_user_sgprs; i++)
- returns[num_returns++] = ctx->ac.i32; /* SGPRs */
- for (i = 0; i < num_vgprs; i++)
- returns[num_returns++] = ctx->ac.f32; /* VGPRs */
- }
- break;
-
- case PIPE_SHADER_TESS_EVAL:
- declare_global_desc_pointers(ctx);
- declare_per_stage_desc_pointers(ctx, true);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->vs_state_bits);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tes_offchip_addr);
-
- if (shader->key.as_es) {
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->es2gs_offset);
- } else {
- declare_streamout_params(ctx, &shader->selector->so);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset);
- }
-
- /* VGPRs */
- declare_tes_input_vgprs(ctx, ngg_cull_shader);
- break;
-
- case PIPE_SHADER_GEOMETRY:
- declare_global_desc_pointers(ctx);
- declare_per_stage_desc_pointers(ctx, true);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->gs2vs_offset);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->gs_wave_id);
-
- /* VGPRs */
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[0]);
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[1]);
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_prim_id);
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[2]);
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[3]);
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[4]);
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[5]);
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_invocation_id);
- break;
-
- case PIPE_SHADER_FRAGMENT:
- declare_global_desc_pointers(ctx);
- declare_per_stage_desc_pointers(ctx, true);
- si_add_arg_checked(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL,
- SI_PARAM_ALPHA_REF);
- si_add_arg_checked(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT,
- &ctx->args.prim_mask, SI_PARAM_PRIM_MASK);
-
- si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 2, AC_ARG_INT, &ctx->args.persp_sample,
- SI_PARAM_PERSP_SAMPLE);
- si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 2, AC_ARG_INT,
- &ctx->args.persp_center, SI_PARAM_PERSP_CENTER);
- si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 2, AC_ARG_INT,
- &ctx->args.persp_centroid, SI_PARAM_PERSP_CENTROID);
- si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 3, AC_ARG_INT,
- NULL, SI_PARAM_PERSP_PULL_MODEL);
- si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 2, AC_ARG_INT,
- &ctx->args.linear_sample, SI_PARAM_LINEAR_SAMPLE);
- si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 2, AC_ARG_INT,
- &ctx->args.linear_center, SI_PARAM_LINEAR_CENTER);
- si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 2, AC_ARG_INT,
- &ctx->args.linear_centroid, SI_PARAM_LINEAR_CENTROID);
- si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 3, AC_ARG_FLOAT,
- NULL, SI_PARAM_LINE_STIPPLE_TEX);
- si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT,
- &ctx->args.frag_pos[0], SI_PARAM_POS_X_FLOAT);
- si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT,
- &ctx->args.frag_pos[1], SI_PARAM_POS_Y_FLOAT);
- si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT,
- &ctx->args.frag_pos[2], SI_PARAM_POS_Z_FLOAT);
- si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT,
- &ctx->args.frag_pos[3], SI_PARAM_POS_W_FLOAT);
- shader->info.face_vgpr_index = ctx->args.num_vgprs_used;
- si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT,
- &ctx->args.front_face, SI_PARAM_FRONT_FACE);
- shader->info.ancillary_vgpr_index = ctx->args.num_vgprs_used;
- si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT,
- &ctx->args.ancillary, SI_PARAM_ANCILLARY);
- si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT,
- &ctx->args.sample_coverage, SI_PARAM_SAMPLE_COVERAGE);
- si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT,
- &ctx->pos_fixed_pt, SI_PARAM_POS_FIXED_PT);
-
- /* Color inputs from the prolog. */
- if (shader->selector->info.colors_read) {
- unsigned num_color_elements =
- util_bitcount(shader->selector->info.colors_read);
-
- for (i = 0; i < num_color_elements; i++)
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL);
-
- num_prolog_vgprs += num_color_elements;
- }
-
- /* Outputs for the epilog. */
- num_return_sgprs = SI_SGPR_ALPHA_REF + 1;
- num_returns =
- num_return_sgprs +
- util_bitcount(shader->selector->info.colors_written) * 4 +
- shader->selector->info.writes_z +
- shader->selector->info.writes_stencil +
- shader->selector->info.writes_samplemask +
- 1 /* SampleMaskIn */;
-
- num_returns = MAX2(num_returns,
- num_return_sgprs +
- PS_EPILOG_SAMPLEMASK_MIN_LOC + 1);
-
- for (i = 0; i < num_return_sgprs; i++)
- returns[i] = ctx->ac.i32;
- for (; i < num_returns; i++)
- returns[i] = ctx->ac.f32;
- break;
-
- case PIPE_SHADER_COMPUTE:
- declare_global_desc_pointers(ctx);
- declare_per_stage_desc_pointers(ctx, true);
- if (shader->selector->info.uses_grid_size)
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 3, AC_ARG_INT,
- &ctx->args.num_work_groups);
- if (shader->selector->info.uses_block_size &&
- shader->selector->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0)
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 3, AC_ARG_INT, &ctx->block_size);
-
- unsigned cs_user_data_dwords =
- shader->selector->info.properties[TGSI_PROPERTY_CS_USER_DATA_COMPONENTS_AMD];
- if (cs_user_data_dwords) {
- ac_add_arg(&ctx->args, AC_ARG_SGPR, cs_user_data_dwords, AC_ARG_INT,
- &ctx->cs_user_data);
- }
-
- /* Hardware SGPRs. */
- for (i = 0; i < 3; i++) {
- if (shader->selector->info.uses_block_id[i]) {
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT,
- &ctx->args.workgroup_ids[i]);
- }
- }
- if (shader->selector->info.uses_subgroup_info)
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tg_size);
-
- /* Hardware VGPRs. */
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 3, AC_ARG_INT,
- &ctx->args.local_invocation_ids);
- break;
- default:
- assert(0 && "unimplemented shader");
- return;
- }
-
- si_llvm_create_func(ctx, ngg_cull_shader ? "ngg_cull_main" : "main",
- returns, num_returns, si_get_max_workgroup_size(shader));
-
- /* Reserve register locations for VGPR inputs the PS prolog may need. */
- if (ctx->type == PIPE_SHADER_FRAGMENT && !ctx->shader->is_monolithic) {
- ac_llvm_add_target_dep_function_attr(ctx->main_fn,
- "InitialPSInputAddr",
- S_0286D0_PERSP_SAMPLE_ENA(1) |
- S_0286D0_PERSP_CENTER_ENA(1) |
- S_0286D0_PERSP_CENTROID_ENA(1) |
- S_0286D0_LINEAR_SAMPLE_ENA(1) |
- S_0286D0_LINEAR_CENTER_ENA(1) |
- S_0286D0_LINEAR_CENTROID_ENA(1) |
- S_0286D0_FRONT_FACE_ENA(1) |
- S_0286D0_ANCILLARY_ENA(1) |
- S_0286D0_POS_FIXED_PT_ENA(1));
- }
-
- shader->info.num_input_sgprs = ctx->args.num_sgprs_used;
- shader->info.num_input_vgprs = ctx->args.num_vgprs_used;
-
- assert(shader->info.num_input_vgprs >= num_prolog_vgprs);
- shader->info.num_input_vgprs -= num_prolog_vgprs;
-
- if (shader->key.as_ls || ctx->type == PIPE_SHADER_TESS_CTRL) {
- if (USE_LDS_SYMBOLS && LLVM_VERSION_MAJOR >= 9) {
- /* The LSHS size is not known until draw time, so we append it
- * at the end of whatever LDS use there may be in the rest of
- * the shader (currently none, unless LLVM decides to do its
- * own LDS-based lowering).
- */
- ctx->ac.lds = LLVMAddGlobalInAddressSpace(
- ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0),
- "__lds_end", AC_ADDR_SPACE_LDS);
- LLVMSetAlignment(ctx->ac.lds, 256);
- } else {
- ac_declare_lds_as_pointer(&ctx->ac);
- }
- }
-
- /* Unlike radv, we override these arguments in the prolog, so to the
- * API shader they appear as normal arguments.
- */
- if (ctx->type == PIPE_SHADER_VERTEX) {
- ctx->abi.vertex_id = ac_get_arg(&ctx->ac, ctx->args.vertex_id);
- ctx->abi.instance_id = ac_get_arg(&ctx->ac, ctx->args.instance_id);
- } else if (ctx->type == PIPE_SHADER_FRAGMENT) {
- ctx->abi.persp_centroid = ac_get_arg(&ctx->ac, ctx->args.persp_centroid);
- ctx->abi.linear_centroid = ac_get_arg(&ctx->ac, ctx->args.linear_centroid);
- }
+ struct si_shader *shader = ctx->shader;
+ LLVMTypeRef returns[AC_MAX_ARGS];
+ unsigned i, num_return_sgprs;
+ unsigned num_returns = 0;
+ unsigned num_prolog_vgprs = 0;
+ unsigned stage = ctx->stage;
+ unsigned vs_blit_property = shader->selector->info.properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD];
+
+ memset(&ctx->args, 0, sizeof(ctx->args));
+
+ /* Set MERGED shaders. */
+ if (ctx->screen->info.chip_class >= GFX9) {
+ if (shader->key.as_ls || stage == MESA_SHADER_TESS_CTRL)
+ stage = SI_SHADER_MERGED_VERTEX_TESSCTRL; /* LS or HS */
+ else if (shader->key.as_es || shader->key.as_ngg || stage == MESA_SHADER_GEOMETRY)
+ stage = SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY;
+ }
+
+ switch (stage) {
+ case MESA_SHADER_VERTEX:
+ declare_global_desc_pointers(ctx);
+
+ if (vs_blit_property) {
+ declare_vs_blit_inputs(ctx, vs_blit_property);
+
+ /* VGPRs */
+ declare_vs_input_vgprs(ctx, &num_prolog_vgprs, ngg_cull_shader);
+ break;
+ }
+
+ declare_per_stage_desc_pointers(ctx, true);
+ declare_vs_specific_input_sgprs(ctx);
+ if (!shader->is_gs_copy_shader)
+ declare_vb_descriptor_input_sgprs(ctx);
+
+ if (shader->key.as_es) {
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->es2gs_offset);
+ } else if (shader->key.as_ls) {
+ /* no extra parameters */
+ } else {
+ /* The locations of the other parameters are assigned dynamically. */
+ declare_streamout_params(ctx, &shader->selector->so);
+ }
+
+ /* VGPRs */
+ declare_vs_input_vgprs(ctx, &num_prolog_vgprs, ngg_cull_shader);
+
+ /* Return values */
+ if (shader->key.opt.vs_as_prim_discard_cs) {
+ for (i = 0; i < 4; i++)
+ returns[num_returns++] = ctx->ac.f32; /* VGPRs */
+ }
+ break;
+
+ case MESA_SHADER_TESS_CTRL: /* GFX6-GFX8 */
+ declare_global_desc_pointers(ctx);
+ declare_per_stage_desc_pointers(ctx, true);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_offsets);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->vs_state_bits);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_factor_offset);
+
+ /* VGPRs */
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.tcs_patch_id);
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.tcs_rel_ids);
+
+ /* param_tcs_offchip_offset and param_tcs_factor_offset are
+ * placed after the user SGPRs.
+ */
+ for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++)
+ returns[num_returns++] = ctx->ac.i32; /* SGPRs */
+ for (i = 0; i < 11; i++)
+ returns[num_returns++] = ctx->ac.f32; /* VGPRs */
+ break;
+
+ case SI_SHADER_MERGED_VERTEX_TESSCTRL:
+ /* Merged stages have 8 system SGPRs at the beginning. */
+ /* SPI_SHADER_USER_DATA_ADDR_LO/HI_HS */
+ declare_per_stage_desc_pointers(ctx, ctx->stage == MESA_SHADER_TESS_CTRL);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->merged_wave_info);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_factor_offset);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->merged_scratch_offset);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
+
+ declare_global_desc_pointers(ctx);
+ declare_per_stage_desc_pointers(ctx, ctx->stage == MESA_SHADER_VERTEX);
+ declare_vs_specific_input_sgprs(ctx);
+
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_offsets);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout);
+ declare_vb_descriptor_input_sgprs(ctx);
+
+ /* VGPRs (first TCS, then VS) */
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.tcs_patch_id);
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.tcs_rel_ids);
+
+ if (ctx->stage == MESA_SHADER_VERTEX) {
+ declare_vs_input_vgprs(ctx, &num_prolog_vgprs, ngg_cull_shader);
+
+ /* LS return values are inputs to the TCS main shader part. */
+ for (i = 0; i < 8 + GFX9_TCS_NUM_USER_SGPR; i++)
+ returns[num_returns++] = ctx->ac.i32; /* SGPRs */
+ for (i = 0; i < 2; i++)
+ returns[num_returns++] = ctx->ac.f32; /* VGPRs */
+ } else {
+ /* TCS return values are inputs to the TCS epilog.
+ *
+ * param_tcs_offchip_offset, param_tcs_factor_offset,
+ * param_tcs_offchip_layout, and param_rw_buffers
+ * should be passed to the epilog.
+ */
+ for (i = 0; i <= 8 + GFX9_SGPR_TCS_OUT_LAYOUT; i++)
+ returns[num_returns++] = ctx->ac.i32; /* SGPRs */
+ for (i = 0; i < 11; i++)
+ returns[num_returns++] = ctx->ac.f32; /* VGPRs */
+ }
+ break;
+
+ case SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY:
+ /* Merged stages have 8 system SGPRs at the beginning. */
+ /* SPI_SHADER_USER_DATA_ADDR_LO/HI_GS */
+ declare_per_stage_desc_pointers(ctx, ctx->stage == MESA_SHADER_GEOMETRY);
+
+ if (ctx->shader->key.as_ngg)
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->gs_tg_info);
+ else
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->gs2vs_offset);
+
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->merged_wave_info);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->merged_scratch_offset);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR,
+ &ctx->small_prim_cull_info); /* SPI_SHADER_PGM_LO_GS << 8 */
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT,
+ NULL); /* unused (SPI_SHADER_PGM_LO/HI_GS >> 24) */
+
+ declare_global_desc_pointers(ctx);
+ if (ctx->stage != MESA_SHADER_VERTEX || !vs_blit_property) {
+ declare_per_stage_desc_pointers(
+ ctx, (ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL));
+ }
+
+ if (ctx->stage == MESA_SHADER_VERTEX) {
+ if (vs_blit_property)
+ declare_vs_blit_inputs(ctx, vs_blit_property);
+ else
+ declare_vs_specific_input_sgprs(ctx);
+ } else {
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->vs_state_bits);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tes_offchip_addr);
+ /* Declare as many input SGPRs as the VS has. */
+ }
+
+ if (ctx->stage == MESA_SHADER_VERTEX)
+ declare_vb_descriptor_input_sgprs(ctx);
+
+ /* VGPRs (first GS, then VS/TES) */
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx01_offset);
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx23_offset);
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_prim_id);
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_invocation_id);
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx45_offset);
+
+ if (ctx->stage == MESA_SHADER_VERTEX) {
+ declare_vs_input_vgprs(ctx, &num_prolog_vgprs, ngg_cull_shader);
+ } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
+ declare_tes_input_vgprs(ctx, ngg_cull_shader);
+ }
+
+ if ((ctx->shader->key.as_es || ngg_cull_shader) &&
+ (ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL)) {
+ unsigned num_user_sgprs, num_vgprs;
+
+ if (ctx->stage == MESA_SHADER_VERTEX) {
+ /* For the NGG cull shader, add 1 SGPR to hold
+ * the vertex buffer pointer.
+ */
+ num_user_sgprs = GFX9_VSGS_NUM_USER_SGPR + ngg_cull_shader;
+
+ if (ngg_cull_shader && shader->selector->num_vbos_in_user_sgprs) {
+ assert(num_user_sgprs <= 8 + SI_SGPR_VS_VB_DESCRIPTOR_FIRST);
+ num_user_sgprs =
+ SI_SGPR_VS_VB_DESCRIPTOR_FIRST + shader->selector->num_vbos_in_user_sgprs * 4;
+ }
+ } else {
+ num_user_sgprs = GFX9_TESGS_NUM_USER_SGPR;
+ }
+
+ /* The NGG cull shader has to return all 9 VGPRs + the old thread ID.
+ *
+ * The normal merged ESGS shader only has to return the 5 VGPRs
+ * for the GS stage.
+ */
+ num_vgprs = ngg_cull_shader ? 10 : 5;
+
+ /* ES return values are inputs to GS. */
+ for (i = 0; i < 8 + num_user_sgprs; i++)
+ returns[num_returns++] = ctx->ac.i32; /* SGPRs */
+ for (i = 0; i < num_vgprs; i++)
+ returns[num_returns++] = ctx->ac.f32; /* VGPRs */
+ }
+ break;
+
+ case MESA_SHADER_TESS_EVAL:
+ declare_global_desc_pointers(ctx);
+ declare_per_stage_desc_pointers(ctx, true);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->vs_state_bits);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tes_offchip_addr);
+
+ if (shader->key.as_es) {
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->es2gs_offset);
+ } else {
+ declare_streamout_params(ctx, &shader->selector->so);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset);
+ }
+
+ /* VGPRs */
+ declare_tes_input_vgprs(ctx, ngg_cull_shader);
+ break;
+
+ case MESA_SHADER_GEOMETRY:
+ declare_global_desc_pointers(ctx);
+ declare_per_stage_desc_pointers(ctx, true);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->gs2vs_offset);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->gs_wave_id);
+
+ /* VGPRs */
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[0]);
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[1]);
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_prim_id);
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[2]);
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[3]);
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[4]);
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[5]);
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_invocation_id);
+ break;
+
+ case MESA_SHADER_FRAGMENT:
+ declare_global_desc_pointers(ctx);
+ declare_per_stage_desc_pointers(ctx, true);
+ si_add_arg_checked(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL, SI_PARAM_ALPHA_REF);
+ si_add_arg_checked(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.prim_mask,
+ SI_PARAM_PRIM_MASK);
+
+ si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 2, AC_ARG_INT, &ctx->args.persp_sample,
+ SI_PARAM_PERSP_SAMPLE);
+ si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 2, AC_ARG_INT, &ctx->args.persp_center,
+ SI_PARAM_PERSP_CENTER);
+ si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 2, AC_ARG_INT, &ctx->args.persp_centroid,
+ SI_PARAM_PERSP_CENTROID);
+ si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 3, AC_ARG_INT, NULL, SI_PARAM_PERSP_PULL_MODEL);
+ si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 2, AC_ARG_INT, &ctx->args.linear_sample,
+ SI_PARAM_LINEAR_SAMPLE);
+ si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 2, AC_ARG_INT, &ctx->args.linear_center,
+ SI_PARAM_LINEAR_CENTER);
+ si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 2, AC_ARG_INT, &ctx->args.linear_centroid,
+ SI_PARAM_LINEAR_CENTROID);
+ si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 3, AC_ARG_FLOAT, NULL, SI_PARAM_LINE_STIPPLE_TEX);
+ si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &ctx->args.frag_pos[0],
+ SI_PARAM_POS_X_FLOAT);
+ si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &ctx->args.frag_pos[1],
+ SI_PARAM_POS_Y_FLOAT);
+ si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &ctx->args.frag_pos[2],
+ SI_PARAM_POS_Z_FLOAT);
+ si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &ctx->args.frag_pos[3],
+ SI_PARAM_POS_W_FLOAT);
+ shader->info.face_vgpr_index = ctx->args.num_vgprs_used;
+ si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.front_face,
+ SI_PARAM_FRONT_FACE);
+ shader->info.ancillary_vgpr_index = ctx->args.num_vgprs_used;
+ si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.ancillary,
+ SI_PARAM_ANCILLARY);
+ si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &ctx->args.sample_coverage,
+ SI_PARAM_SAMPLE_COVERAGE);
+ si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->pos_fixed_pt,
+ SI_PARAM_POS_FIXED_PT);
+
+ /* Color inputs from the prolog. */
+ if (shader->selector->info.colors_read) {
+ unsigned num_color_elements = util_bitcount(shader->selector->info.colors_read);
+
+ for (i = 0; i < num_color_elements; i++)
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL);
+
+ num_prolog_vgprs += num_color_elements;
+ }
+
+ /* Outputs for the epilog. */
+ num_return_sgprs = SI_SGPR_ALPHA_REF + 1;
+ num_returns = num_return_sgprs + util_bitcount(shader->selector->info.colors_written) * 4 +
+ shader->selector->info.writes_z + shader->selector->info.writes_stencil +
+ shader->selector->info.writes_samplemask + 1 /* SampleMaskIn */;
+
+ num_returns = MAX2(num_returns, num_return_sgprs + PS_EPILOG_SAMPLEMASK_MIN_LOC + 1);
+
+ for (i = 0; i < num_return_sgprs; i++)
+ returns[i] = ctx->ac.i32;
+ for (; i < num_returns; i++)
+ returns[i] = ctx->ac.f32;
+ break;
+
+ case MESA_SHADER_COMPUTE:
+ declare_global_desc_pointers(ctx);
+ declare_per_stage_desc_pointers(ctx, true);
+ if (shader->selector->info.uses_grid_size)
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 3, AC_ARG_INT, &ctx->args.num_work_groups);
+ if (shader->selector->info.uses_block_size &&
+ shader->selector->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0)
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 3, AC_ARG_INT, &ctx->block_size);
+
+ unsigned cs_user_data_dwords =
+ shader->selector->info.properties[TGSI_PROPERTY_CS_USER_DATA_COMPONENTS_AMD];
+ if (cs_user_data_dwords) {
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, cs_user_data_dwords, AC_ARG_INT, &ctx->cs_user_data);
+ }
+
+ /* Some descriptors can be in user SGPRs. */
+ /* Shader buffers in user SGPRs. */
+ for (unsigned i = 0; i < shader->selector->cs_num_shaderbufs_in_user_sgprs; i++) {
+ while (ctx->args.num_sgprs_used % 4 != 0)
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 4, AC_ARG_INT, &ctx->cs_shaderbuf[i]);
+ }
+ /* Images in user SGPRs. */
+ for (unsigned i = 0; i < shader->selector->cs_num_images_in_user_sgprs; i++) {
+ unsigned num_sgprs = shader->selector->info.image_buffers & (1 << i) ? 4 : 8;
+
+ while (ctx->args.num_sgprs_used % num_sgprs != 0)
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, num_sgprs, AC_ARG_INT, &ctx->cs_image[i]);
+ }
+
+ /* Hardware SGPRs. */
+ for (i = 0; i < 3; i++) {
+ if (shader->selector->info.uses_block_id[i]) {
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.workgroup_ids[i]);
+ }
+ }
+ if (shader->selector->info.uses_subgroup_info)
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tg_size);
+
+ /* Hardware VGPRs. */
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 3, AC_ARG_INT, &ctx->args.local_invocation_ids);
+ break;
+ default:
+ assert(0 && "unimplemented shader");
+ return;
+ }
+
+ si_llvm_create_func(ctx, ngg_cull_shader ? "ngg_cull_main" : "main", returns, num_returns,
+ si_get_max_workgroup_size(shader));
+
+ /* Reserve register locations for VGPR inputs the PS prolog may need. */
+ if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->shader->is_monolithic) {
+ ac_llvm_add_target_dep_function_attr(
+ ctx->main_fn, "InitialPSInputAddr",
+ S_0286D0_PERSP_SAMPLE_ENA(1) | S_0286D0_PERSP_CENTER_ENA(1) |
+ S_0286D0_PERSP_CENTROID_ENA(1) | S_0286D0_LINEAR_SAMPLE_ENA(1) |
+ S_0286D0_LINEAR_CENTER_ENA(1) | S_0286D0_LINEAR_CENTROID_ENA(1) |
+ S_0286D0_FRONT_FACE_ENA(1) | S_0286D0_ANCILLARY_ENA(1) | S_0286D0_POS_FIXED_PT_ENA(1));
+ }
+
+ shader->info.num_input_sgprs = ctx->args.num_sgprs_used;
+ shader->info.num_input_vgprs = ctx->args.num_vgprs_used;
+
+ assert(shader->info.num_input_vgprs >= num_prolog_vgprs);
+ shader->info.num_input_vgprs -= num_prolog_vgprs;
+
+ if (shader->key.as_ls || ctx->stage == MESA_SHADER_TESS_CTRL) {
+ if (USE_LDS_SYMBOLS && LLVM_VERSION_MAJOR >= 9) {
+ /* The LSHS size is not known until draw time, so we append it
+ * at the end of whatever LDS use there may be in the rest of
+ * the shader (currently none, unless LLVM decides to do its
+ * own LDS-based lowering).
+ */
+ ctx->ac.lds = LLVMAddGlobalInAddressSpace(ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0),
+ "__lds_end", AC_ADDR_SPACE_LDS);
+ LLVMSetAlignment(ctx->ac.lds, 256);
+ } else {
+ ac_declare_lds_as_pointer(&ctx->ac);
+ }
+ }
+
+ /* Unlike radv, we override these arguments in the prolog, so to the
+ * API shader they appear as normal arguments.
+ */
+ if (ctx->stage == MESA_SHADER_VERTEX) {
+ ctx->abi.vertex_id = ac_get_arg(&ctx->ac, ctx->args.vertex_id);
+ ctx->abi.instance_id = ac_get_arg(&ctx->ac, ctx->args.instance_id);
+ } else if (ctx->stage == MESA_SHADER_FRAGMENT) {
+ ctx->abi.persp_centroid = ac_get_arg(&ctx->ac, ctx->args.persp_centroid);
+ ctx->abi.linear_centroid = ac_get_arg(&ctx->ac, ctx->args.linear_centroid);
+ }