bool exec_potentially_empty = false;
} cf_info;
- /* scratch */
- bool scratch_enabled = false;
-
/* inputs common for merged stages */
Temp merged_wave_info = Temp(0, s1);
user_sgpr_info.need_ring_offsets = true;
/* 2 user sgprs will nearly always be allocated for scratch/rings */
- if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets || ctx->scratch_enabled)
- user_sgpr_count += 2;
+ user_sgpr_count += 2;
switch (ctx->stage) {
case vertex_vs:
arg_info args = {};
/* this needs to be in sgprs 0 and 1 */
- if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets || ctx->scratch_enabled) {
- add_arg(&args, s2, &ctx->program->private_segment_buffer, 0);
- set_loc_shader_ptr(ctx, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_info.user_sgpr_idx);
- }
+ add_arg(&args, s2, &ctx->program->private_segment_buffer, 0);
+ set_loc_shader_ptr(ctx, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_info.user_sgpr_idx);
unsigned vgpr_idx = 0;
switch (ctx->stage) {
else
declare_streamout_sgprs(ctx, &args, &idx);
- if (ctx->options->supports_spill || ctx->scratch_enabled)
- add_arg(&args, s1, &ctx->program->scratch_offset, idx++);
+ add_arg(&args, s1, &ctx->program->scratch_offset, idx++);
declare_vs_input_vgprs(ctx, &args);
break;
assert(user_sgpr_info.user_sgpr_idx == user_sgpr_info.num_sgpr);
add_arg(&args, s1, &ctx->prim_mask, user_sgpr_info.user_sgpr_idx);
- if (ctx->options->supports_spill || ctx->scratch_enabled)
- add_arg(&args, s1, &ctx->program->scratch_offset, user_sgpr_info.user_sgpr_idx + 1);
+ add_arg(&args, s1, &ctx->program->scratch_offset, user_sgpr_info.user_sgpr_idx + 1);
ctx->program->config->spi_ps_input_addr = 0;
ctx->program->config->spi_ps_input_ena = 0;
if (ctx->program->info->cs.uses_local_invocation_idx)
add_arg(&args, s1, &ctx->tg_size, idx++);
- if (ctx->options->supports_spill || ctx->scratch_enabled)
- add_arg(&args, s1, &ctx->program->scratch_offset, idx++);
+ add_arg(&args, s1, &ctx->program->scratch_offset, idx++);
add_arg(&args, v3, &ctx->local_invocation_ids, vgpr_idx++);
break;
unsigned scratch_size = 0;
for (unsigned i = 0; i < shader_count; i++)
scratch_size = std::max(scratch_size, shaders[i]->scratch_size);
- ctx.scratch_enabled = scratch_size > 0;
ctx.program->config->scratch_bytes_per_wave = align(scratch_size * ctx.program->wave_size, 1024);
ctx.block = ctx.program->create_and_insert_block();
}
struct user_sgpr_info {
- bool need_ring_offsets;
bool indirect_all_descriptor_sets;
uint8_t remaining_sgprs;
};
memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
- /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */
- if (stage == MESA_SHADER_GEOMETRY ||
- stage == MESA_SHADER_VERTEX ||
- stage == MESA_SHADER_TESS_CTRL ||
- stage == MESA_SHADER_TESS_EVAL ||
- args->is_gs_copy_shader)
- user_sgpr_info->need_ring_offsets = true;
-
- if (stage == MESA_SHADER_FRAGMENT &&
- args->shader_info->ps.needs_sample_positions)
- user_sgpr_info->need_ring_offsets = true;
-
- /* 2 user sgprs will nearly always be allocated for scratch/rings */
- if (args->options->supports_spill || user_sgpr_info->need_ring_offsets) {
- user_sgpr_count += 2;
- }
+ /* 2 user sgprs will always be allocated for scratch/rings */
+ user_sgpr_count += 2;
switch (stage) {
case MESA_SHADER_COMPUTE:
allocate_user_sgprs(args, stage, has_previous_stage,
previous_stage, needs_view_index, &user_sgpr_info);
- if (user_sgpr_info.need_ring_offsets && !args->options->supports_spill) {
+ if (args->options->explicit_scratch_args) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR,
&args->ring_offsets);
}
&args->ac.tg_size);
}
+ if (args->options->explicit_scratch_args) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+ &args->scratch_offset);
+ }
+
ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT,
&args->ac.local_invocation_ids);
break;
declare_streamout_sgprs(args, stage);
}
+ if (args->options->explicit_scratch_args) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+ &args->scratch_offset);
+ }
+
declare_vs_input_vgprs(args);
break;
case MESA_SHADER_TESS_CTRL:
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->tess_factor_offset);
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->scratch_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
&args->tess_factor_offset);
+ if (args->options->explicit_scratch_args) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+ &args->scratch_offset);
+ }
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->ac.tcs_patch_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
declare_streamout_sgprs(args, stage);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
}
+ if (args->options->explicit_scratch_args) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+ &args->scratch_offset);
+ }
declare_tes_input_vgprs(args);
break;
case MESA_SHADER_GEOMETRY:
&args->merged_wave_info);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->scratch_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs2vs_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs_wave_id);
+ if (args->options->explicit_scratch_args) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+ &args->scratch_offset);
+ }
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
&args->gs_vtx_offset[0]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
declare_global_input_sgprs(args, &user_sgpr_info);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask);
+ if (args->options->explicit_scratch_args) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+ &args->scratch_offset);
+ }
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
}
args->shader_info->num_input_vgprs = 0;
- args->shader_info->num_input_sgprs = args->options->supports_spill ? 2 : 0;
+ args->shader_info->num_input_sgprs = 2;
args->shader_info->num_input_sgprs += args->ac.num_sgprs_used;
if (stage != MESA_SHADER_FRAGMENT)
uint8_t user_sgpr_idx = 0;
- if (args->options->supports_spill || user_sgpr_info.need_ring_offsets) {
- set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS,
- &user_sgpr_idx);
- }
+ set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS,
+ &user_sgpr_idx);
/* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
* the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */