program->reads_variable_block_size =
sel.info.uses_block_size &&
sel.info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0;
+ program->num_cs_user_data_dwords =
+ sel.info.properties[TGSI_PROPERTY_CS_USER_DATA_DWORDS];
void *ir_binary = si_get_ir_binary(&sel);
bool scratch_enabled = shader->config.scratch_bytes_per_wave > 0;
unsigned user_sgprs = SI_NUM_RESOURCE_SGPRS +
(sel.info.uses_grid_size ? 3 : 0) +
- (program->reads_variable_block_size ? 3 : 0);
+ (program->reads_variable_block_size ? 3 : 0) +
+ program->num_cs_user_data_dwords;
shader->config.rsrc1 =
S_00B848_VGPRS((shader->config.num_vgprs - 1) / 4) |
return true;
}
-static void si_setup_tgsi_grid(struct si_context *sctx,
+static void si_setup_tgsi_user_data(struct si_context *sctx,
const struct pipe_grid_info *info)
{
struct si_compute *program = sctx->cs_shader_state.program;
unsigned block_size_reg = grid_size_reg +
/* 12 bytes = 3 dwords. */
12 * program->uses_grid_size;
+ unsigned cs_user_data_reg = block_size_reg +
+ 12 * program->reads_variable_block_size;
if (info->indirect) {
if (program->uses_grid_size) {
radeon_emit(cs, info->block[2]);
}
}
+
+ if (program->num_cs_user_data_dwords) {
+ radeon_set_sh_reg_seq(cs, cs_user_data_reg, program->num_cs_user_data_dwords);
+ radeon_emit_array(cs, sctx->cs_user_data, program->num_cs_user_data_dwords);
+ }
}
static void si_emit_dispatch_packets(struct si_context *sctx,
}
if (program->ir_type != PIPE_SHADER_IR_NATIVE)
- si_setup_tgsi_grid(sctx, info);
+ si_setup_tgsi_user_data(sctx, info);
si_emit_dispatch_packets(sctx, info);
unsigned uses_bindless_samplers:1;
unsigned uses_bindless_images:1;
bool reads_variable_block_size;
+ unsigned num_cs_user_data_dwords;
};
void si_destroy_compute(struct si_compute *program);
unsigned border_color_count;
unsigned num_vs_blit_sgprs;
uint32_t vs_blit_sh_data[SI_VS_BLIT_SGPRS_POS_TEXCOORD];
+ uint32_t cs_user_data[4];
/* Vertex and index buffers. */
bool vertex_buffers_dirty;
break;
}
+ case TGSI_SEMANTIC_CS_USER_DATA:
+ value = LLVMGetParam(ctx->main_fn, ctx->param_cs_user_data);
+ break;
+
default:
assert(!"unknown system value");
return;
shader->selector->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0)
ctx->param_block_size = add_arg(&fninfo, ARG_SGPR, v3i32);
+ unsigned cs_user_data_dwords =
+ shader->selector->info.properties[TGSI_PROPERTY_CS_USER_DATA_DWORDS];
+ if (cs_user_data_dwords) {
+ ctx->param_cs_user_data = add_arg(&fninfo, ARG_SGPR,
+ LLVMVectorType(ctx->i32, cs_user_data_dwords));
+ }
+
for (i = 0; i < 3; i++) {
ctx->abi.workgroup_ids[i] = NULL;
if (shader->selector->info.uses_block_id[i])
/* SI-specific system values. */
enum {
+ /* Values from set_tess_state. */
TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI = TGSI_SEMANTIC_COUNT,
TGSI_SEMANTIC_DEFAULT_TESSINNER_SI,
+
+ /* Up to 4 dwords in user SGPRs for compute shaders. */
+ TGSI_SEMANTIC_CS_USER_DATA,
};
enum {
/* Use a property enum that CS wouldn't use. */
TGSI_PROPERTY_CS_LOCAL_SIZE = TGSI_PROPERTY_FS_COORD_ORIGIN,
+ /* The number of used user data dwords in the range [1, 4]. */
+ TGSI_PROPERTY_CS_USER_DATA_DWORDS = TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
+
/* Use a property enum that VS wouldn't use. */
TGSI_PROPERTY_VS_BLIT_SGPRS = TGSI_PROPERTY_FS_COORD_ORIGIN,
int param_gs_vtx45_offset; /* in dwords (GFX9) */
/* CS */
int param_block_size;
+ int param_cs_user_data;
struct ac_llvm_compiler *compiler;