#include "si_pipe.h"
#include "sid.h"
+#include "compiler/nir/nir.h"
static const char *scratch_rsrc_dword0_symbol =
"SCRATCH_RSRC_DWORD0";
ubyte vertex_stream[4];
};
+/**
+ * Used to collect types and other info about arguments of the LLVM function
+ * before the function is created.
+ */
+struct si_function_info {
+ LLVMTypeRef types[100];
+ LLVMValueRef *assign[100];
+ unsigned num_sgpr_params;
+ unsigned num_params;
+};
+
+enum si_arg_regfile {
+ ARG_SGPR,
+ ARG_VGPR
+};
+
static void si_init_shader_ctx(struct si_shader_context *ctx,
struct si_screen *sscreen,
LLVMTargetMachineRef tm);
shader->selector->type == PIPE_SHADER_GEOMETRY;
}
+static void si_init_function_info(struct si_function_info *fninfo)
+{
+ fninfo->num_params = 0;
+ fninfo->num_sgpr_params = 0;
+}
+
+static unsigned add_arg_assign(struct si_function_info *fninfo,
+ enum si_arg_regfile regfile, LLVMTypeRef type,
+ LLVMValueRef *assign)
+{
+ assert(regfile != ARG_SGPR || fninfo->num_sgpr_params == fninfo->num_params);
+
+ unsigned idx = fninfo->num_params++;
+ assert(idx < ARRAY_SIZE(fninfo->types));
+
+ if (regfile == ARG_SGPR)
+ fninfo->num_sgpr_params = fninfo->num_params;
+
+ fninfo->types[idx] = type;
+ fninfo->assign[idx] = assign;
+ return idx;
+}
+
+static unsigned add_arg(struct si_function_info *fninfo,
+ enum si_arg_regfile regfile, LLVMTypeRef type)
+{
+ return add_arg_assign(fninfo, regfile, type, NULL);
+}
+
+static void add_arg_checked(struct si_function_info *fninfo,
+ enum si_arg_regfile regfile, LLVMTypeRef type,
+ unsigned idx)
+{
+ MAYBE_UNUSED unsigned actual = add_arg(fninfo, regfile, type);
+ assert(actual == idx);
+}
+
/**
* Returns a unique index for a per-patch semantic name and index. The index
* must be less than 32, so that a 32-bit bitmask of used inputs or outputs
}
}
+/**
+ * Helper function that builds an LLVM IR PHI node and immediately adds
+ * incoming edges.
+ */
+static LLVMValueRef
+build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
+ unsigned count_incoming, LLVMValueRef *values,
+ LLVMBasicBlockRef *blocks)
+{
+ LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
+ LLVMAddIncoming(phi, values, blocks, count_incoming);
+ return phi;
+}
+
/**
* Get the value of a shader input parameter and extract a bitfield.
*/
{
struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMValueRef result = LLVMGetParam(ctx->main_fn,
- ctx->param_instance_id);
+ LLVMValueRef result = ctx->abi.instance_id;
/* The division must be done before START_INSTANCE is added. */
if (divisor != ctx->i32_1)
return LLVMBuildFPTrunc(builder, value, ctx->f32, "");
}
-static void declare_input_vs(
+void si_llvm_load_input_vs(
struct si_shader_context *ctx,
unsigned input_index,
- const struct tgsi_full_declaration *decl,
LLVMValueRef out[4])
{
struct gallivm_state *gallivm = &ctx->gallivm;
}
}
-static LLVMValueRef get_primitive_id(struct lp_build_tgsi_context *bld_base,
- unsigned swizzle)
+static void declare_input_vs(
+ struct si_shader_context *ctx,
+ unsigned input_index,
+ const struct tgsi_full_declaration *decl,
+ LLVMValueRef out[4])
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
+ si_llvm_load_input_vs(ctx, input_index, out);
+}
+static LLVMValueRef get_primitive_id(struct si_shader_context *ctx,
+ unsigned swizzle)
+{
if (swizzle > 0)
return ctx->i32_0;
* Return the value of tgsi_ind_register for indexing.
* This is the indirect index with the constant offset added to it.
*/
-static LLVMValueRef get_indirect_index(struct si_shader_context *ctx,
- const struct tgsi_ind_register *ind,
- int rel_index)
+LLVMValueRef si_get_indirect_index(struct si_shader_context *ctx,
+ const struct tgsi_ind_register *ind,
+ int rel_index)
{
struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef result;
}
/**
- * Like get_indirect_index, but restricts the return value to a (possibly
+ * Like si_get_indirect_index, but restricts the return value to a (possibly
* undefined) value inside [0..num).
*/
LLVMValueRef si_get_bounded_indirect_index(struct si_shader_context *ctx,
const struct tgsi_ind_register *ind,
int rel_index, unsigned num)
{
- LLVMValueRef result = get_indirect_index(ctx, ind, rel_index);
+ LLVMValueRef result = si_get_indirect_index(ctx, ind, rel_index);
return si_llvm_bound_index(ctx, result, num);
}
LLVMValueRef index;
if (reg.Dimension.Indirect)
- index = get_indirect_index(ctx, ®.DimIndirect,
+ index = si_get_indirect_index(ctx, ®.DimIndirect,
reg.Dimension.Index);
else
index = LLVMConstInt(ctx->i32, reg.Dimension.Index, 0);
else
first = reg.Register.Index;
- ind_index = get_indirect_index(ctx, ®.Indirect,
+ ind_index = si_get_indirect_index(ctx, ®.Indirect,
reg.Register.Index - first);
base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
if (reg.Register.Dimension) {
if (reg.Dimension.Indirect)
- vertex_index = get_indirect_index(ctx, ®.DimIndirect,
+ vertex_index = si_get_indirect_index(ctx, ®.DimIndirect,
reg.Dimension.Index);
else
vertex_index = LLVMConstInt(ctx->i32, reg.Dimension.Index, 0);
else
param_base = reg.Register.Index;
- param_index = get_indirect_index(ctx, ®.Indirect,
+ param_index = si_get_indirect_index(ctx, ®.Indirect,
reg.Register.Index - param_base);
} else {
LLVMValueRef value;
if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
- return get_primitive_id(bld_base, swizzle);
+ return get_primitive_id(ctx, swizzle);
if (!reg->Register.Dimension)
return NULL;
}
}
-static void declare_input_fs(
+void si_llvm_load_input_fs(
struct si_shader_context *ctx,
unsigned input_index,
- const struct tgsi_full_declaration *decl,
LLVMValueRef out[4])
{
struct lp_build_context *base = &ctx->bld_base.base;
struct si_shader *shader = ctx->shader;
+ struct tgsi_shader_info *info = &shader->selector->info;
LLVMValueRef main_fn = ctx->main_fn;
LLVMValueRef interp_param = NULL;
int interp_param_idx;
+ enum tgsi_semantic semantic_name = info->input_semantic_name[input_index];
+ unsigned semantic_index = info->input_semantic_index[input_index];
+ enum tgsi_interpolate_mode interp_mode = info->input_interpolate[input_index];
+ enum tgsi_interpolate_loc interp_loc = info->input_interpolate_loc[input_index];
/* Get colors from input VGPRs (set by the prolog). */
- if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR) {
- unsigned i = decl->Semantic.Index;
+ if (semantic_name == TGSI_SEMANTIC_COLOR) {
unsigned colors_read = shader->selector->info.colors_read;
- unsigned mask = colors_read >> (i * 4);
+ unsigned mask = colors_read >> (semantic_index * 4);
unsigned offset = SI_PARAM_POS_FIXED_PT + 1 +
- (i ? util_bitcount(colors_read & 0xf) : 0);
+ (semantic_index ? util_bitcount(colors_read & 0xf) : 0);
out[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : base->undef;
out[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : base->undef;
return;
}
- interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate,
- decl->Interp.Location);
+ interp_param_idx = lookup_interp_param_index(interp_mode, interp_loc);
if (interp_param_idx == -1)
return;
else if (interp_param_idx) {
interp_param = LLVMGetParam(ctx->main_fn, interp_param_idx);
}
- interp_fs_input(ctx, input_index, decl->Semantic.Name,
- decl->Semantic.Index, 0, /* this param is unused */
+ interp_fs_input(ctx, input_index, semantic_name,
+ semantic_index, 0, /* this param is unused */
shader->selector->info.colors_read, interp_param,
LLVMGetParam(main_fn, SI_PARAM_PRIM_MASK),
LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE),
&out[0]);
}
+static void declare_input_fs(
+ struct si_shader_context *ctx,
+ unsigned input_index,
+ const struct tgsi_full_declaration *decl,
+ LLVMValueRef out[4])
+{
+ si_llvm_load_input_fs(ctx, input_index, out);
+}
+
static LLVMValueRef get_sample_id(struct si_shader_context *ctx)
{
return unpack_param(ctx, SI_PARAM_ANCILLARY, 8, 4);
switch (decl->Semantic.Name) {
case TGSI_SEMANTIC_INSTANCEID:
- value = LLVMGetParam(ctx->main_fn,
- ctx->param_instance_id);
+ value = ctx->abi.instance_id;
break;
case TGSI_SEMANTIC_VERTEXID:
value = LLVMBuildAdd(gallivm->builder,
- LLVMGetParam(ctx->main_fn,
- ctx->param_vertex_id),
- LLVMGetParam(ctx->main_fn,
- ctx->param_base_vertex), "");
+ ctx->abi.vertex_id,
+ ctx->abi.base_vertex, "");
break;
case TGSI_SEMANTIC_VERTEXID_NOBASE:
indexed = LLVMBuildTrunc(gallivm->builder, indexed, ctx->i1, "");
value = LLVMBuildSelect(gallivm->builder, indexed,
- LLVMGetParam(ctx->main_fn, ctx->param_base_vertex),
- ctx->i32_0, "");
+ ctx->abi.base_vertex, ctx->i32_0, "");
break;
}
case TGSI_SEMANTIC_BASEINSTANCE:
- value = LLVMGetParam(ctx->main_fn, ctx->param_start_instance);
+ value = ctx->abi.start_instance;
break;
case TGSI_SEMANTIC_DRAWID:
- value = LLVMGetParam(ctx->main_fn, ctx->param_draw_id);
+ value = ctx->abi.draw_id;
break;
case TGSI_SEMANTIC_INVOCATIONID:
}
case TGSI_SEMANTIC_PRIMID:
- value = get_primitive_id(&ctx->bld_base, 0);
+ value = get_primitive_id(ctx, 0);
break;
case TGSI_SEMANTIC_GRID_SIZE:
LLVMConstInt(ctx->i32, si_get_constbuf_slot(i), 0));
}
+static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index)
+{
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers);
+
+ index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers);
+ index = LLVMBuildAdd(ctx->gallivm.builder, index,
+ LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), "");
+
+ return ac_build_indexed_load_const(&ctx->ac, ptr, index);
+}
+
static LLVMValueRef fetch_constant(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *reg,
static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
+ LLVMBuilderRef builder = ctx->gallivm.builder;
LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
si_copy_tcs_inputs(bld_base);
invocation_id = unpack_param(ctx, ctx->param_tcs_rel_ids, 8, 5);
tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
+ if (ctx->screen->b.chip_class >= GFX9) {
+ LLVMBasicBlockRef blocks[2] = {
+ LLVMGetInsertBlock(builder),
+ ctx->merged_wrap_if_state.entry_block
+ };
+ LLVMValueRef values[2];
+
+ lp_build_endif(&ctx->merged_wrap_if_state);
+
+ values[0] = rel_patch_id;
+ values[1] = LLVMGetUndef(ctx->i32);
+ rel_patch_id = build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
+
+ values[0] = tf_lds_offset;
+ values[1] = LLVMGetUndef(ctx->i32);
+ tf_lds_offset = build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
+
+ values[0] = invocation_id;
+ values[1] = ctx->i32_1; /* cause the epilog to skip threads */
+ invocation_id = build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
+ }
+
/* Return epilog parameters from this function. */
- LLVMBuilderRef builder = ctx->gallivm.builder;
LLVMValueRef ret = ctx->return_value;
unsigned vgpr;
invocation_id = bitcast(bld_base, TGSI_TYPE_FLOAT, invocation_id);
tf_lds_offset = bitcast(bld_base, TGSI_TYPE_FLOAT, tf_lds_offset);
+ /* Leave a hole corresponding to the two input VGPRs. This ensures that
+ * the invocation_id output does not alias the param_tcs_rel_ids input,
+ * which saves a V_MOV on gfx9.
+ */
+ vgpr += 2;
+
ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");
ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE,
si_get_gs_wave_id(ctx));
+
+ if (ctx->screen->b.chip_class >= GFX9)
+ lp_build_endif(&ctx->merged_wrap_if_state);
}
-static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base)
+static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi,
+ unsigned max_outputs,
+ LLVMValueRef *addrs)
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
struct gallivm_state *gallivm = &ctx->gallivm;
struct tgsi_shader_info *info = &ctx->shader->selector->info;
struct si_shader_output_values *outputs = NULL;
int i,j;
assert(!ctx->shader->is_gs_copy_shader);
+ assert(info->num_outputs <= max_outputs);
outputs = MALLOC((info->num_outputs + 1) * sizeof(outputs[0]));
}
for (j = 0; j < 4; j++) {
- addr = ctx->outputs[i][j];
+ addr = addrs[4 * i + j];
val = LLVMBuildLoad(gallivm->builder, addr, "");
val = ac_build_clamp(&ctx->ac, val);
LLVMBuildStore(gallivm->builder, val, addr);
for (j = 0; j < 4; j++) {
outputs[i].values[j] =
LLVMBuildLoad(gallivm->builder,
- ctx->outputs[i][j],
+ addrs[4 * i + j],
"");
outputs[i].vertex_stream[j] =
(info->output_streams[i] >> (2 * j)) & 3;
if (ctx->shader->key.mono.u.vs_export_prim_id) {
outputs[i].semantic_name = TGSI_SEMANTIC_PRIMID;
outputs[i].semantic_index = 0;
- outputs[i].values[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
- get_primitive_id(bld_base, 0));
+ outputs[i].values[0] = LLVMBuildBitCast(gallivm->builder,
+ get_primitive_id(ctx, 0), ctx->f32, "");
for (j = 1; j < 4; j++)
outputs[i].values[j] = LLVMConstReal(ctx->f32, 0);
i++;
}
- si_llvm_export_vs(bld_base, outputs, i);
+ si_llvm_export_vs(&ctx->bld_base, outputs, i);
FREE(outputs);
}
+static void si_tgsi_emit_epilogue(struct lp_build_tgsi_context *bld_base)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+
+ ctx->abi.emit_outputs(&ctx->abi, RADEON_LLVM_MAX_OUTPUTS,
+ &ctx->outputs[0][0]);
+}
+
struct si_ps_exports {
unsigned num;
struct ac_export_args args[10];
*
* The alpha-ref SGPR is returned via its original location.
*/
-static void si_llvm_return_fs_outputs(struct lp_build_tgsi_context *bld_base)
+static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi,
+ unsigned max_outputs,
+ LLVMValueRef *addrs)
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
struct si_shader *shader = ctx->shader;
struct tgsi_shader_info *info = &shader->selector->info;
LLVMBuilderRef builder = ctx->gallivm.builder;
case TGSI_SEMANTIC_COLOR:
assert(semantic_index < 8);
for (j = 0; j < 4; j++) {
- LLVMValueRef ptr = ctx->outputs[i][j];
+ LLVMValueRef ptr = addrs[4 * i + j];
LLVMValueRef result = LLVMBuildLoad(builder, ptr, "");
color[semantic_index][j] = result;
}
break;
case TGSI_SEMANTIC_POSITION:
depth = LLVMBuildLoad(builder,
- ctx->outputs[i][2], "");
+ addrs[4 * i + 2], "");
break;
case TGSI_SEMANTIC_STENCIL:
stencil = LLVMBuildLoad(builder,
- ctx->outputs[i][1], "");
+ addrs[4 * i + 1], "");
break;
case TGSI_SEMANTIC_SAMPLEMASK:
samplemask = LLVMBuildLoad(builder,
- ctx->outputs[i][0], "");
+ addrs[4 * i + 0], "");
break;
default:
fprintf(stderr, "Warning: SI unhandled fs output type:%d\n",
/* Set SGPRs. */
ret = LLVMBuildInsertValue(builder, ret,
- bitcast(bld_base, TGSI_TYPE_SIGNED,
- LLVMGetParam(ctx->main_fn,
- SI_PARAM_ALPHA_REF)),
+ LLVMBuildBitCast(ctx->ac.builder,
+ LLVMGetParam(ctx->main_fn,
+ SI_PARAM_ALPHA_REF),
+ ctx->i32, ""),
SI_SGPR_ALPHA_REF, "");
/* Set VGPRs */
input_array_size = info->num_inputs - input_base;
}
- array_idx = get_indirect_index(ctx, &input->Indirect,
+ array_idx = si_get_indirect_index(ctx, &input->Indirect,
input->Register.Index - input_base);
} else {
input_base = inst->Src[0].Register.Index;
static void si_create_function(struct si_shader_context *ctx,
const char *name,
LLVMTypeRef *returns, unsigned num_returns,
- LLVMTypeRef *params, unsigned num_params,
- int last_sgpr, unsigned max_workgroup_size)
+ struct si_function_info *fninfo,
+ unsigned max_workgroup_size)
{
int i;
si_llvm_create_func(ctx, name, returns, num_returns,
- params, num_params);
+ fninfo->types, fninfo->num_params);
ctx->return_value = LLVMGetUndef(ctx->return_type);
- for (i = 0; i <= last_sgpr; ++i) {
+ for (i = 0; i < fninfo->num_sgpr_params; ++i) {
LLVMValueRef P = LLVMGetParam(ctx->main_fn, i);
/* The combination of:
lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_INREG);
}
+ for (i = 0; i < fninfo->num_params; ++i) {
+ if (fninfo->assign[i])
+ *fninfo->assign[i] = LLVMGetParam(ctx->main_fn, i);
+ }
+
if (max_workgroup_size) {
si_llvm_add_attribute(ctx->main_fn, "amdgpu-max-work-group-size",
max_workgroup_size);
static void declare_streamout_params(struct si_shader_context *ctx,
struct pipe_stream_output_info *so,
- LLVMTypeRef *params, LLVMTypeRef i32,
- unsigned *num_params)
+ struct si_function_info *fninfo)
{
int i;
/* Streamout SGPRs. */
if (so->num_outputs) {
if (ctx->type != PIPE_SHADER_TESS_EVAL)
- params[ctx->param_streamout_config = (*num_params)++] = i32;
+ ctx->param_streamout_config = add_arg(fninfo, ARG_SGPR, ctx->ac.i32);
else
- ctx->param_streamout_config = *num_params - 1;
+ ctx->param_streamout_config = fninfo->num_params - 1;
- params[ctx->param_streamout_write_index = (*num_params)++] = i32;
+ ctx->param_streamout_write_index = add_arg(fninfo, ARG_SGPR, ctx->ac.i32);
}
/* A streamout buffer offset is loaded if the stride is non-zero. */
for (i = 0; i < 4; i++) {
if (!so->stride[i])
continue;
- params[ctx->param_streamout_offset[i] = (*num_params)++] = i32;
+ ctx->param_streamout_offset[i] = add_arg(fninfo, ARG_SGPR, ctx->ac.i32);
}
}
}
static void declare_per_stage_desc_pointers(struct si_shader_context *ctx,
- LLVMTypeRef *params,
- unsigned *num_params,
+ struct si_function_info *fninfo,
bool assign_params)
{
- params[(*num_params)++] = si_const_array(ctx->v4i32,
- SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS);
- params[(*num_params)++] = si_const_array(ctx->v8i32,
- SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2);
+ unsigned const_and_shader_buffers =
+ add_arg(fninfo, ARG_SGPR,
+ si_const_array(ctx->v4i32,
+ SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS));
+ unsigned samplers_and_images =
+ add_arg(fninfo, ARG_SGPR,
+ si_const_array(ctx->v8i32,
+ SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2));
if (assign_params) {
- ctx->param_const_and_shader_buffers = *num_params - 2;
- ctx->param_samplers_and_images = *num_params - 1;
+ ctx->param_const_and_shader_buffers = const_and_shader_buffers;
+ ctx->param_samplers_and_images = samplers_and_images;
}
}
static void declare_default_desc_pointers(struct si_shader_context *ctx,
- LLVMTypeRef *params,
- unsigned *num_params)
+ struct si_function_info *fninfo)
{
- params[ctx->param_rw_buffers = (*num_params)++] =
- si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS);
- declare_per_stage_desc_pointers(ctx, params, num_params, true);
+ ctx->param_rw_buffers = add_arg(fninfo, ARG_SGPR,
+ si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS));
+ declare_per_stage_desc_pointers(ctx, fninfo, true);
}
static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx,
- LLVMTypeRef *params,
- unsigned *num_params)
+ struct si_function_info *fninfo)
{
- params[ctx->param_vertex_buffers = (*num_params)++] =
- si_const_array(ctx->v4i32, SI_NUM_VERTEX_BUFFERS);
- params[ctx->param_base_vertex = (*num_params)++] = ctx->i32;
- params[ctx->param_start_instance = (*num_params)++] = ctx->i32;
- params[ctx->param_draw_id = (*num_params)++] = ctx->i32;
- params[ctx->param_vs_state_bits = (*num_params)++] = ctx->i32;
+ ctx->param_vertex_buffers = add_arg(fninfo, ARG_SGPR,
+ si_const_array(ctx->v4i32, SI_NUM_VERTEX_BUFFERS));
+ add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.base_vertex);
+ add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.start_instance);
+ add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.draw_id);
+ ctx->param_vs_state_bits = add_arg(fninfo, ARG_SGPR, ctx->i32);
}
static void declare_vs_input_vgprs(struct si_shader_context *ctx,
- LLVMTypeRef *params, unsigned *num_params,
+ struct si_function_info *fninfo,
unsigned *num_prolog_vgprs)
{
struct si_shader *shader = ctx->shader;
- params[ctx->param_vertex_id = (*num_params)++] = ctx->i32;
+ add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.vertex_id);
if (shader->key.as_ls) {
- params[ctx->param_rel_auto_id = (*num_params)++] = ctx->i32;
- params[ctx->param_instance_id = (*num_params)++] = ctx->i32;
+ ctx->param_rel_auto_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
+ add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.instance_id);
} else {
- params[ctx->param_instance_id = (*num_params)++] = ctx->i32;
- params[ctx->param_vs_prim_id = (*num_params)++] = ctx->i32;
+ add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.instance_id);
+ ctx->param_vs_prim_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
}
- params[(*num_params)++] = ctx->i32; /* unused */
+ add_arg(fninfo, ARG_VGPR, ctx->i32); /* unused */
if (!shader->is_gs_copy_shader) {
/* Vertex load indices. */
- ctx->param_vertex_index0 = (*num_params);
+ ctx->param_vertex_index0 = fninfo->num_params;
for (unsigned i = 0; i < shader->selector->info.num_inputs; i++)
- params[(*num_params)++] = ctx->i32;
+ add_arg(fninfo, ARG_VGPR, ctx->i32);
*num_prolog_vgprs += shader->selector->info.num_inputs;
}
}
static void declare_tes_input_vgprs(struct si_shader_context *ctx,
- LLVMTypeRef *params, unsigned *num_params)
+ struct si_function_info *fninfo)
{
- params[ctx->param_tes_u = (*num_params)++] = ctx->f32;
- params[ctx->param_tes_v = (*num_params)++] = ctx->f32;
- params[ctx->param_tes_rel_patch_id = (*num_params)++] = ctx->i32;
- params[ctx->param_tes_patch_id = (*num_params)++] = ctx->i32;
+ ctx->param_tes_u = add_arg(fninfo, ARG_VGPR, ctx->f32);
+ ctx->param_tes_v = add_arg(fninfo, ARG_VGPR, ctx->f32);
+ ctx->param_tes_rel_patch_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_tes_patch_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
}
enum {
struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
struct gallivm_state *gallivm = &ctx->gallivm;
struct si_shader *shader = ctx->shader;
- LLVMTypeRef params[100]; /* just make it large enough */
+ struct si_function_info fninfo;
LLVMTypeRef returns[16+32*4];
- unsigned i, last_sgpr, num_params = 0, num_return_sgprs;
+ unsigned i, num_return_sgprs;
unsigned num_returns = 0;
unsigned num_prolog_vgprs = 0;
unsigned type = ctx->type;
+ si_init_function_info(&fninfo);
+
/* Set MERGED shaders. */
if (ctx->screen->b.chip_class >= GFX9) {
if (shader->key.as_ls || type == PIPE_SHADER_TESS_CTRL)
switch (type) {
case PIPE_SHADER_VERTEX:
- declare_default_desc_pointers(ctx, params, &num_params);
- declare_vs_specific_input_sgprs(ctx, params, &num_params);
+ declare_default_desc_pointers(ctx, &fninfo);
+ declare_vs_specific_input_sgprs(ctx, &fninfo);
if (shader->key.as_es) {
- params[ctx->param_es2gs_offset = num_params++] = ctx->i32;
+ ctx->param_es2gs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
} else if (shader->key.as_ls) {
/* no extra parameters */
} else {
- if (shader->is_gs_copy_shader)
- num_params = ctx->param_rw_buffers + 1;
+ if (shader->is_gs_copy_shader) {
+ fninfo.num_params = ctx->param_rw_buffers + 1;
+ fninfo.num_sgpr_params = fninfo.num_params;
+ }
/* The locations of the other parameters are assigned dynamically. */
declare_streamout_params(ctx, &shader->selector->so,
- params, ctx->i32, &num_params);
+ &fninfo);
}
- last_sgpr = num_params-1;
-
/* VGPRs */
- declare_vs_input_vgprs(ctx, params, &num_params,
- &num_prolog_vgprs);
+ declare_vs_input_vgprs(ctx, &fninfo, &num_prolog_vgprs);
break;
case PIPE_SHADER_TESS_CTRL: /* SI-CI-VI */
- declare_default_desc_pointers(ctx, params, &num_params);
- params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
- params[ctx->param_tcs_out_lds_offsets = num_params++] = ctx->i32;
- params[ctx->param_tcs_out_lds_layout = num_params++] = ctx->i32;
- params[ctx->param_vs_state_bits = num_params++] = ctx->i32;
- params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
- params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32;
- params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
- params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
- last_sgpr = num_params - 1;
+ declare_default_desc_pointers(ctx, &fninfo);
+ ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
/* VGPRs */
- params[ctx->param_tcs_patch_id = num_params++] = ctx->i32;
- params[ctx->param_tcs_rel_ids = num_params++] = ctx->i32;
+ ctx->param_tcs_patch_id = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_tcs_rel_ids = add_arg(&fninfo, ARG_VGPR, ctx->i32);
/* param_tcs_offchip_offset and param_tcs_factor_offset are
* placed after the user SGPRs.
*/
for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++)
returns[num_returns++] = ctx->i32; /* SGPRs */
- for (i = 0; i < 3; i++)
+ for (i = 0; i < 5; i++)
returns[num_returns++] = ctx->f32; /* VGPRs */
break;
case SI_SHADER_MERGED_VERTEX_TESSCTRL:
/* Merged stages have 8 system SGPRs at the beginning. */
- params[ctx->param_rw_buffers = num_params++] = /* SPI_SHADER_USER_DATA_ADDR_LO_HS */
- si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS);
- params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
- params[ctx->param_merged_wave_info = num_params++] = ctx->i32;
- params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
- params[ctx->param_merged_scratch_offset = num_params++] = ctx->i32;
- params[num_params++] = ctx->i32; /* unused */
- params[num_params++] = ctx->i32; /* unused */
-
- params[num_params++] = ctx->i32; /* unused */
- params[num_params++] = ctx->i32; /* unused */
- declare_per_stage_desc_pointers(ctx, params, &num_params,
+ ctx->param_rw_buffers = /* SPI_SHADER_USER_DATA_ADDR_LO_HS */
+ add_arg(&fninfo, ARG_SGPR, si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS));
+ ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_merged_scratch_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
+
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
+ declare_per_stage_desc_pointers(ctx, &fninfo,
ctx->type == PIPE_SHADER_VERTEX);
- declare_vs_specific_input_sgprs(ctx, params, &num_params);
+ declare_vs_specific_input_sgprs(ctx, &fninfo);
- params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
- params[ctx->param_tcs_out_lds_offsets = num_params++] = ctx->i32;
- params[ctx->param_tcs_out_lds_layout = num_params++] = ctx->i32;
- params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
- params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32;
- params[num_params++] = ctx->i32; /* unused */
+ ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
- declare_per_stage_desc_pointers(ctx, params, &num_params,
+ declare_per_stage_desc_pointers(ctx, &fninfo,
ctx->type == PIPE_SHADER_TESS_CTRL);
- last_sgpr = num_params - 1;
/* VGPRs (first TCS, then VS) */
- params[ctx->param_tcs_patch_id = num_params++] = ctx->i32;
- params[ctx->param_tcs_rel_ids = num_params++] = ctx->i32;
+ ctx->param_tcs_patch_id = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_tcs_rel_ids = add_arg(&fninfo, ARG_VGPR, ctx->i32);
if (ctx->type == PIPE_SHADER_VERTEX) {
- declare_vs_input_vgprs(ctx, params, &num_params,
+ declare_vs_input_vgprs(ctx, &fninfo,
&num_prolog_vgprs);
/* LS return values are inputs to the TCS main shader part. */
*/
for (i = 0; i <= 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K; i++)
returns[num_returns++] = ctx->i32; /* SGPRs */
- for (i = 0; i < 3; i++)
+ for (i = 0; i < 5; i++)
returns[num_returns++] = ctx->f32; /* VGPRs */
}
break;
case SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY:
/* Merged stages have 8 system SGPRs at the beginning. */
- params[ctx->param_rw_buffers = num_params++] = /* SPI_SHADER_USER_DATA_ADDR_LO_GS */
- si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS);
- params[ctx->param_gs2vs_offset = num_params++] = ctx->i32;
- params[ctx->param_merged_wave_info = num_params++] = ctx->i32;
- params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
- params[ctx->param_merged_scratch_offset = num_params++] = ctx->i32;
- params[num_params++] = ctx->i32; /* unused (SPI_SHADER_PGM_LO/HI_GS << 8) */
- params[num_params++] = ctx->i32; /* unused (SPI_SHADER_PGM_LO/HI_GS >> 24) */
-
- params[num_params++] = ctx->i32; /* unused */
- params[num_params++] = ctx->i32; /* unused */
- declare_per_stage_desc_pointers(ctx, params, &num_params,
+ ctx->param_rw_buffers = /* SPI_SHADER_USER_DATA_ADDR_LO_GS */
+ add_arg(&fninfo, ARG_SGPR, si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS));
+ ctx->param_gs2vs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_merged_scratch_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_PGM_LO/HI_GS << 8) */
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_PGM_LO/HI_GS >> 24) */
+
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
+ declare_per_stage_desc_pointers(ctx, &fninfo,
(ctx->type == PIPE_SHADER_VERTEX ||
ctx->type == PIPE_SHADER_TESS_EVAL));
if (ctx->type == PIPE_SHADER_VERTEX) {
- declare_vs_specific_input_sgprs(ctx, params, &num_params);
+ declare_vs_specific_input_sgprs(ctx, &fninfo);
} else {
/* TESS_EVAL (and also GEOMETRY):
* Declare as many input SGPRs as the VS has. */
- params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
- params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
- params[num_params++] = ctx->i32; /* unused */
- params[num_params++] = ctx->i32; /* unused */
- params[num_params++] = ctx->i32; /* unused */
- params[ctx->param_vs_state_bits = num_params++] = ctx->i32; /* unused */
+ ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
+ ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
}
- declare_per_stage_desc_pointers(ctx, params, &num_params,
+ declare_per_stage_desc_pointers(ctx, &fninfo,
ctx->type == PIPE_SHADER_GEOMETRY);
- last_sgpr = num_params - 1;
/* VGPRs (first GS, then VS/TES) */
- params[ctx->param_gs_vtx01_offset = num_params++] = ctx->i32;
- params[ctx->param_gs_vtx23_offset = num_params++] = ctx->i32;
- params[ctx->param_gs_prim_id = num_params++] = ctx->i32;
- params[ctx->param_gs_instance_id = num_params++] = ctx->i32;
- params[ctx->param_gs_vtx45_offset = num_params++] = ctx->i32;
+ ctx->param_gs_vtx01_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_gs_vtx23_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_gs_prim_id = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_gs_instance_id = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_gs_vtx45_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
if (ctx->type == PIPE_SHADER_VERTEX) {
- declare_vs_input_vgprs(ctx, params, &num_params,
+ declare_vs_input_vgprs(ctx, &fninfo,
&num_prolog_vgprs);
} else if (ctx->type == PIPE_SHADER_TESS_EVAL) {
- declare_tes_input_vgprs(ctx, params, &num_params);
+ declare_tes_input_vgprs(ctx, &fninfo);
}
if (ctx->type == PIPE_SHADER_VERTEX ||
break;
case PIPE_SHADER_TESS_EVAL:
- declare_default_desc_pointers(ctx, params, &num_params);
- params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
- params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
+ declare_default_desc_pointers(ctx, &fninfo);
+ ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
if (shader->key.as_es) {
- params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[ctx->param_es2gs_offset = num_params++] = ctx->i32;
+ ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_es2gs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
} else {
- params[num_params++] = ctx->i32;
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
declare_streamout_params(ctx, &shader->selector->so,
- params, ctx->i32, &num_params);
- params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
+ &fninfo);
+ ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
}
- last_sgpr = num_params - 1;
/* VGPRs */
- declare_tes_input_vgprs(ctx, params, &num_params);
+ declare_tes_input_vgprs(ctx, &fninfo);
break;
case PIPE_SHADER_GEOMETRY:
- declare_default_desc_pointers(ctx, params, &num_params);
- params[ctx->param_gs2vs_offset = num_params++] = ctx->i32;
- params[ctx->param_gs_wave_id = num_params++] = ctx->i32;
- last_sgpr = num_params - 1;
+ declare_default_desc_pointers(ctx, &fninfo);
+ ctx->param_gs2vs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_gs_wave_id = add_arg(&fninfo, ARG_SGPR, ctx->i32);
/* VGPRs */
- params[ctx->param_gs_vtx0_offset = num_params++] = ctx->i32;
- params[ctx->param_gs_vtx1_offset = num_params++] = ctx->i32;
- params[ctx->param_gs_prim_id = num_params++] = ctx->i32;
- params[ctx->param_gs_vtx2_offset = num_params++] = ctx->i32;
- params[ctx->param_gs_vtx3_offset = num_params++] = ctx->i32;
- params[ctx->param_gs_vtx4_offset = num_params++] = ctx->i32;
- params[ctx->param_gs_vtx5_offset = num_params++] = ctx->i32;
- params[ctx->param_gs_instance_id = num_params++] = ctx->i32;
+ ctx->param_gs_vtx0_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_gs_vtx1_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_gs_prim_id = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_gs_vtx2_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_gs_vtx3_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_gs_vtx4_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_gs_vtx5_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_gs_instance_id = add_arg(&fninfo, ARG_VGPR, ctx->i32);
break;
case PIPE_SHADER_FRAGMENT:
- declare_default_desc_pointers(ctx, params, &num_params);
- params[SI_PARAM_ALPHA_REF] = ctx->f32;
- params[SI_PARAM_PRIM_MASK] = ctx->i32;
- last_sgpr = SI_PARAM_PRIM_MASK;
- params[SI_PARAM_PERSP_SAMPLE] = ctx->v2i32;
- params[SI_PARAM_PERSP_CENTER] = ctx->v2i32;
- params[SI_PARAM_PERSP_CENTROID] = ctx->v2i32;
- params[SI_PARAM_PERSP_PULL_MODEL] = v3i32;
- params[SI_PARAM_LINEAR_SAMPLE] = ctx->v2i32;
- params[SI_PARAM_LINEAR_CENTER] = ctx->v2i32;
- params[SI_PARAM_LINEAR_CENTROID] = ctx->v2i32;
- params[SI_PARAM_LINE_STIPPLE_TEX] = ctx->f32;
- params[SI_PARAM_POS_X_FLOAT] = ctx->f32;
- params[SI_PARAM_POS_Y_FLOAT] = ctx->f32;
- params[SI_PARAM_POS_Z_FLOAT] = ctx->f32;
- params[SI_PARAM_POS_W_FLOAT] = ctx->f32;
- params[SI_PARAM_FRONT_FACE] = ctx->i32;
+ declare_default_desc_pointers(ctx, &fninfo);
+ add_arg_checked(&fninfo, ARG_SGPR, ctx->f32, SI_PARAM_ALPHA_REF);
+ add_arg_checked(&fninfo, ARG_SGPR, ctx->i32, SI_PARAM_PRIM_MASK);
+
+ add_arg_checked(&fninfo, ARG_VGPR, ctx->v2i32, SI_PARAM_PERSP_SAMPLE);
+ add_arg_checked(&fninfo, ARG_VGPR, ctx->v2i32, SI_PARAM_PERSP_CENTER);
+ add_arg_checked(&fninfo, ARG_VGPR, ctx->v2i32, SI_PARAM_PERSP_CENTROID);
+ add_arg_checked(&fninfo, ARG_VGPR, v3i32, SI_PARAM_PERSP_PULL_MODEL);
+ add_arg_checked(&fninfo, ARG_VGPR, ctx->v2i32, SI_PARAM_LINEAR_SAMPLE);
+ add_arg_checked(&fninfo, ARG_VGPR, ctx->v2i32, SI_PARAM_LINEAR_CENTER);
+ add_arg_checked(&fninfo, ARG_VGPR, ctx->v2i32, SI_PARAM_LINEAR_CENTROID);
+ add_arg_checked(&fninfo, ARG_VGPR, ctx->f32, SI_PARAM_LINE_STIPPLE_TEX);
+ add_arg_checked(&fninfo, ARG_VGPR, ctx->f32, SI_PARAM_POS_X_FLOAT);
+ add_arg_checked(&fninfo, ARG_VGPR, ctx->f32, SI_PARAM_POS_Y_FLOAT);
+ add_arg_checked(&fninfo, ARG_VGPR, ctx->f32, SI_PARAM_POS_Z_FLOAT);
+ add_arg_checked(&fninfo, ARG_VGPR, ctx->f32, SI_PARAM_POS_W_FLOAT);
+ add_arg_checked(&fninfo, ARG_VGPR, ctx->i32, SI_PARAM_FRONT_FACE);
shader->info.face_vgpr_index = 20;
- params[SI_PARAM_ANCILLARY] = ctx->i32;
- params[SI_PARAM_SAMPLE_COVERAGE] = ctx->f32;
- params[SI_PARAM_POS_FIXED_PT] = ctx->i32;
- num_params = SI_PARAM_POS_FIXED_PT+1;
+ add_arg_checked(&fninfo, ARG_VGPR, ctx->i32, SI_PARAM_ANCILLARY);
+ add_arg_checked(&fninfo, ARG_VGPR, ctx->f32, SI_PARAM_SAMPLE_COVERAGE);
+ add_arg_checked(&fninfo, ARG_VGPR, ctx->i32, SI_PARAM_POS_FIXED_PT);
/* Color inputs from the prolog. */
if (shader->selector->info.colors_read) {
unsigned num_color_elements =
util_bitcount(shader->selector->info.colors_read);
- assert(num_params + num_color_elements <= ARRAY_SIZE(params));
+ assert(fninfo.num_params + num_color_elements <= ARRAY_SIZE(fninfo.types));
for (i = 0; i < num_color_elements; i++)
- params[num_params++] = ctx->f32;
+ add_arg(&fninfo, ARG_VGPR, ctx->f32);
num_prolog_vgprs += num_color_elements;
}
break;
case PIPE_SHADER_COMPUTE:
- declare_default_desc_pointers(ctx, params, &num_params);
+ declare_default_desc_pointers(ctx, &fninfo);
if (shader->selector->info.uses_grid_size)
- params[ctx->param_grid_size = num_params++] = v3i32;
+ ctx->param_grid_size = add_arg(&fninfo, ARG_SGPR, v3i32);
if (shader->selector->info.uses_block_size)
- params[ctx->param_block_size = num_params++] = v3i32;
+ ctx->param_block_size = add_arg(&fninfo, ARG_SGPR, v3i32);
for (i = 0; i < 3; i++) {
ctx->param_block_id[i] = -1;
if (shader->selector->info.uses_block_id[i])
- params[ctx->param_block_id[i] = num_params++] = ctx->i32;
+ ctx->param_block_id[i] = add_arg(&fninfo, ARG_SGPR, ctx->i32);
}
- last_sgpr = num_params - 1;
- params[ctx->param_thread_id = num_params++] = v3i32;
+ ctx->param_thread_id = add_arg(&fninfo, ARG_VGPR, v3i32);
break;
default:
assert(0 && "unimplemented shader");
return;
}
- assert(num_params <= ARRAY_SIZE(params));
-
- si_create_function(ctx, "main", returns, num_returns, params,
- num_params, last_sgpr,
+ si_create_function(ctx, "main", returns, num_returns, &fninfo,
si_get_max_workgroup_size(shader));
/* Reserve register locations for VGPR inputs the PS prolog may need. */
shader->info.num_input_sgprs = 0;
shader->info.num_input_vgprs = 0;
- for (i = 0; i <= last_sgpr; ++i)
- shader->info.num_input_sgprs += llvm_get_type_size(params[i]) / 4;
+ for (i = 0; i < fninfo.num_sgpr_params; ++i)
+ shader->info.num_input_sgprs += llvm_get_type_size(fninfo.types[i]) / 4;
- for (; i < num_params; ++i)
- shader->info.num_input_vgprs += llvm_get_type_size(params[i]) / 4;
+ for (; i < fninfo.num_params; ++i)
+ shader->info.num_input_vgprs += llvm_get_type_size(fninfo.types[i]) / 4;
assert(shader->info.num_input_vgprs >= num_prolog_vgprs);
shader->info.num_input_vgprs -= num_prolog_vgprs;
si_dump_shader_key(processor, shader, file);
if (!check_debug_option && shader->binary.llvm_ir_string) {
+ if (shader->previous_stage &&
+ shader->previous_stage->binary.llvm_ir_string) {
+ fprintf(file, "\n%s - previous stage - LLVM IR:\n\n",
+ si_get_shader_name(shader, processor));
+ fprintf(file, "%s\n", shader->previous_stage->binary.llvm_ir_string);
+ }
+
fprintf(file, "\n%s - main shader part - LLVM IR:\n\n",
si_get_shader_name(shader, processor));
fprintf(file, "%s\n", shader->binary.llvm_ir_string);
preload_ring_buffers(&ctx);
LLVMValueRef voffset =
- lp_build_mul_imm(uint, LLVMGetParam(ctx.main_fn,
- ctx.param_vertex_id), 4);
+ lp_build_mul_imm(uint, ctx.abi.vertex_id, 4);
/* Fetch the vertex stream ID.*/
LLVMValueRef stream_id;
{
struct lp_build_tgsi_context *bld_base;
+ ctx->abi.chip_class = sscreen->b.chip_class;
+
si_llvm_context_init(ctx, sscreen, tm);
bld_base = &ctx->bld_base;
struct si_shader_selector *sel = shader->selector;
struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
+ // TODO clean all this up!
switch (ctx->type) {
case PIPE_SHADER_VERTEX:
ctx->load_input = declare_input_vs;
bld_base->emit_epilogue = si_llvm_emit_ls_epilogue;
else if (shader->key.as_es)
bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
- else
- bld_base->emit_epilogue = si_llvm_emit_vs_epilogue;
+ else {
+ ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
+ bld_base->emit_epilogue = si_tgsi_emit_epilogue;
+ }
break;
case PIPE_SHADER_TESS_CTRL:
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tcs;
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
if (shader->key.as_es)
bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
- else
- bld_base->emit_epilogue = si_llvm_emit_vs_epilogue;
+ else {
+ ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
+ bld_base->emit_epilogue = si_tgsi_emit_epilogue;
+ }
break;
case PIPE_SHADER_GEOMETRY:
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
break;
case PIPE_SHADER_FRAGMENT:
ctx->load_input = declare_input_fs;
- bld_base->emit_epilogue = si_llvm_return_fs_outputs;
+ ctx->abi.emit_outputs = si_llvm_return_fs_outputs;
+ bld_base->emit_epilogue = si_tgsi_emit_epilogue;
break;
case PIPE_SHADER_COMPUTE:
ctx->declare_memory_region = declare_compute_memory;
return false;
}
+ ctx->abi.load_ubo = load_ubo;
+
create_function(ctx);
preload_ring_buffers(ctx);
/* For GFX9 merged shaders:
- * - Set EXEC. If the prolog is present, set EXEC there instead.
+ * - Set EXEC for the first shader. If the prolog is present, set
+ * EXEC there instead.
* - Add a barrier before the second shader.
+ * - In the second shader, reset EXEC to ~0 and wrap the main part in
+ * an if-statement. This is required for correctness in geometry
+ * shaders, to ensure that empty GS waves do not send GS_EMIT and
+ * GS_CUT messages.
*
- * The same thing for monolithic shaders is done in
- * si_build_wrapper_function.
+ * For monolithic merged shaders, the first shader is wrapped in an
+ * if-block together with its prolog in si_build_wrapper_function.
*/
- if (ctx->screen->b.chip_class >= GFX9 && !is_monolithic) {
- if (sel->info.num_instructions > 1 && /* not empty shader */
+ if (ctx->screen->b.chip_class >= GFX9) {
+ if (!is_monolithic &&
+ sel->info.num_instructions > 1 && /* not empty shader */
(shader->key.as_es || shader->key.as_ls) &&
(ctx->type == PIPE_SHADER_TESS_EVAL ||
(ctx->type == PIPE_SHADER_VERTEX &&
ctx->param_merged_wave_info, 0);
} else if (ctx->type == PIPE_SHADER_TESS_CTRL ||
ctx->type == PIPE_SHADER_GEOMETRY) {
- si_init_exec_from_input(ctx,
- ctx->param_merged_wave_info, 8);
+ if (!is_monolithic)
+ si_init_exec_full_mask(ctx);
+
+ /* The barrier must execute for all shaders in a
+ * threadgroup.
+ */
si_llvm_emit_barrier(NULL, bld_base, NULL);
+
+ LLVMValueRef num_threads = unpack_param(ctx, ctx->param_merged_wave_info, 8, 8);
+ LLVMValueRef ena =
+ LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
+ ac_get_thread_id(&ctx->ac), num_threads, "");
+ lp_build_if(&ctx->merged_wrap_if_state, &ctx->gallivm, ena);
}
}
ctx->postponed_kill = lp_build_alloca(&ctx->gallivm, ctx->f32, "");
}
- if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
- fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
- return false;
+ if (sel->tokens) {
+ if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
+ fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
+ return false;
+ }
+ } else {
+ if (!si_nir_build_llvm(ctx, sel->nir)) {
+ fprintf(stderr, "Failed to translate shader from NIR to LLVM\n");
+ return false;
+ }
}
si_llvm_build_ret(ctx, ctx->return_value);
{
unsigned num_sgprs, num_vgprs;
struct gallivm_state *gallivm = &ctx->gallivm;
+ struct si_function_info fninfo;
LLVMBuilderRef builder = gallivm->builder;
- LLVMTypeRef params[48]; /* 40 SGPRs (maximum) + some VGPRs */
LLVMTypeRef returns[48];
LLVMValueRef func, ret;
+ si_init_function_info(&fninfo);
+
if (ctx->screen->b.chip_class >= GFX9) {
num_sgprs = 8 + GFX9_GS_NUM_USER_SGPR;
num_vgprs = 5; /* ES inputs are not needed by GS */
}
for (unsigned i = 0; i < num_sgprs; ++i) {
- params[i] = ctx->i32;
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
returns[i] = ctx->i32;
}
for (unsigned i = 0; i < num_vgprs; ++i) {
- params[num_sgprs + i] = ctx->i32;
+ add_arg(&fninfo, ARG_VGPR, ctx->i32);
returns[num_sgprs + i] = ctx->f32;
}
/* Create the function. */
si_create_function(ctx, "gs_prolog", returns, num_sgprs + num_vgprs,
- params, num_sgprs + num_vgprs, num_sgprs - 1, 0);
+ &fninfo, 0);
func = ctx->main_fn;
/* Set the full EXEC mask for the prolog, because we are only fiddling
/* PS epilog has one arg per color component; gfx9 merged shader
* prologs need to forward 32 user SGPRs.
*/
- LLVMTypeRef param_types[64];
+ struct si_function_info fninfo;
LLVMValueRef initial[64], out[64];
LLVMTypeRef function_type;
- unsigned num_params;
+ unsigned num_first_params;
unsigned num_out, initial_num_out;
MAYBE_UNUSED unsigned num_out_sgpr; /* used in debug checks */
MAYBE_UNUSED unsigned initial_num_out_sgpr; /* used in debug checks */
unsigned num_sgprs, num_vgprs;
- unsigned last_sgpr_param;
unsigned gprs;
struct lp_build_if_state if_state;
+ si_init_function_info(&fninfo);
+
for (unsigned i = 0; i < num_parts; ++i) {
lp_add_function_attr(parts[i], -1, LP_FUNC_ATTR_ALWAYSINLINE);
LLVMSetLinkage(parts[i], LLVMPrivateLinkage);
num_vgprs = 0;
function_type = LLVMGetElementType(LLVMTypeOf(parts[0]));
- num_params = LLVMCountParamTypes(function_type);
+ num_first_params = LLVMCountParamTypes(function_type);
- for (unsigned i = 0; i < num_params; ++i) {
+ for (unsigned i = 0; i < num_first_params; ++i) {
LLVMValueRef param = LLVMGetParam(parts[0], i);
if (ac_is_sgpr_param(param)) {
num_vgprs += llvm_get_type_size(LLVMTypeOf(param)) / 4;
}
}
- assert(num_vgprs + num_sgprs <= ARRAY_SIZE(param_types));
- num_params = 0;
- last_sgpr_param = 0;
gprs = 0;
while (gprs < num_sgprs + num_vgprs) {
- LLVMValueRef param = LLVMGetParam(parts[main_part], num_params);
- unsigned size;
+ LLVMValueRef param = LLVMGetParam(parts[main_part], fninfo.num_params);
+ LLVMTypeRef type = LLVMTypeOf(param);
+ unsigned size = llvm_get_type_size(type) / 4;
- param_types[num_params] = LLVMTypeOf(param);
- if (gprs < num_sgprs)
- last_sgpr_param = num_params;
- size = llvm_get_type_size(param_types[num_params]) / 4;
- num_params++;
+ add_arg(&fninfo, gprs < num_sgprs ? ARG_SGPR : ARG_VGPR, type);
assert(ac_is_sgpr_param(param) == (gprs < num_sgprs));
assert(gprs + size <= num_sgprs + num_vgprs &&
gprs += size;
}
- si_create_function(ctx, "wrapper", NULL, 0, param_types, num_params,
- last_sgpr_param,
+ si_create_function(ctx, "wrapper", NULL, 0, &fninfo,
si_get_max_workgroup_size(ctx->shader));
if (is_merged_shader(ctx->shader))
num_out = 0;
num_out_sgpr = 0;
- for (unsigned i = 0; i < num_params; ++i) {
+ for (unsigned i = 0; i < fninfo.num_params; ++i) {
LLVMValueRef param = LLVMGetParam(ctx->main_fn, i);
LLVMTypeRef param_type = LLVMTypeOf(param);
- LLVMTypeRef out_type = i <= last_sgpr_param ? ctx->i32 : ctx->f32;
+ LLVMTypeRef out_type = i < fninfo.num_sgpr_params ? ctx->i32 : ctx->f32;
unsigned size = llvm_get_type_size(param_type) / 4;
if (size == 1) {
builder, param, LLVMConstInt(ctx->i32, j, 0), "");
}
- if (i <= last_sgpr_param)
+ if (i < fninfo.num_sgpr_params)
num_out_sgpr = num_out;
}
LLVMValueRef ret;
LLVMTypeRef ret_type;
unsigned out_idx = 0;
-
- num_params = LLVMCountParams(parts[part]);
- assert(num_params <= ARRAY_SIZE(param_types));
+ unsigned num_params = LLVMCountParams(parts[part]);
/* Merged shaders are executed conditionally depending
* on the number of enabled threads passed in the input SGPRs. */
- if (is_merged_shader(ctx->shader) &&
- (part == 0 || part == next_shader_first_part)) {
+ if (is_merged_shader(ctx->shader) && part == 0) {
LLVMValueRef ena, count = initial[3];
- /* The thread count for the 2nd shader is at bit-offset 8. */
- if (part == next_shader_first_part) {
- count = LLVMBuildLShr(builder, count,
- LLVMConstInt(ctx->i32, 8, 0), "");
- }
count = LLVMBuildAnd(builder, count,
LLVMConstInt(ctx->i32, 0x7f, 0), "");
ena = LLVMBuildICmp(builder, LLVMIntULT,
ret = LLVMBuildCall(builder, parts[part], in, num_params, "");
if (is_merged_shader(ctx->shader) &&
- (part + 1 == next_shader_first_part ||
- part + 1 == num_parts)) {
+ part + 1 == next_shader_first_part) {
lp_build_endif(&if_state);
- if (part + 1 == next_shader_first_part) {
- /* A barrier is required between 2 merged shaders. */
- si_llvm_emit_barrier(NULL, &ctx->bld_base, NULL);
-
- /* The second half of the merged shader should use
- * the inputs from the toplevel (wrapper) function,
- * not the return value from the last call.
- *
- * That's because the last call was executed condi-
- * tionally, so we can't consume it in the main
- * block.
- */
- memcpy(out, initial, sizeof(initial));
- num_out = initial_num_out;
- num_out_sgpr = initial_num_out_sgpr;
- }
+ /* The second half of the merged shader should use
+ * the inputs from the toplevel (wrapper) function,
+ * not the return value from the last call.
+ *
+ * That's because the last call was executed condi-
+ * tionally, so we can't consume it in the main
+ * block.
+ */
+ memcpy(out, initial, sizeof(initial));
+ num_out = initial_num_out;
+ num_out_sgpr = initial_num_out_sgpr;
continue;
}
* conversion fails. */
if (r600_can_dump_shader(&sscreen->b, sel->info.processor) &&
!(sscreen->b.debug_flags & DBG_NO_TGSI)) {
- tgsi_dump(sel->tokens, 0);
+ if (sel->tokens)
+ tgsi_dump(sel->tokens, 0);
+ else
+ nir_print_shader(sel->nir, stderr);
si_dump_streamout(&sel->so);
}
union si_shader_part_key *key)
{
struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMTypeRef *params, *returns;
+ struct si_function_info fninfo;
+ LLVMTypeRef *returns;
LLVMValueRef ret, func;
- int last_sgpr, num_params, num_returns, i;
+ int num_returns, i;
unsigned first_vs_vgpr = key->vs_prolog.num_input_sgprs +
key->vs_prolog.num_merged_next_stage_vgprs;
unsigned num_input_vgprs = key->vs_prolog.num_merged_next_stage_vgprs + 4;
num_input_vgprs;
unsigned user_sgpr_base = key->vs_prolog.num_merged_next_stage_vgprs ? 8 : 0;
- ctx->param_vertex_id = first_vs_vgpr;
- ctx->param_instance_id = first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1);
+ si_init_function_info(&fninfo);
/* 4 preloaded VGPRs + vertex load indices as prolog outputs */
- params = alloca(num_all_input_regs * sizeof(LLVMTypeRef));
returns = alloca((num_all_input_regs + key->vs_prolog.last_input + 1) *
sizeof(LLVMTypeRef));
- num_params = 0;
num_returns = 0;
/* Declare input and output SGPRs. */
- num_params = 0;
for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
- params[num_params++] = ctx->i32;
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
returns[num_returns++] = ctx->i32;
}
- last_sgpr = num_params - 1;
/* Preloaded VGPRs (outputs must be floats) */
for (i = 0; i < num_input_vgprs; i++) {
- params[num_params++] = ctx->i32;
+ add_arg(&fninfo, ARG_VGPR, ctx->i32);
returns[num_returns++] = ctx->f32;
}
+ fninfo.assign[first_vs_vgpr] = &ctx->abi.vertex_id;
+ fninfo.assign[first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1)] = &ctx->abi.instance_id;
+
/* Vertex load indices. */
for (i = 0; i <= key->vs_prolog.last_input; i++)
returns[num_returns++] = ctx->f32;
/* Create the function. */
- si_create_function(ctx, "vs_prolog", returns, num_returns, params,
- num_params, last_sgpr, 0);
+ si_create_function(ctx, "vs_prolog", returns, num_returns, &fninfo, 0);
func = ctx->main_fn;
if (key->vs_prolog.num_merged_next_stage_vgprs &&
LLVMValueRef p = LLVMGetParam(func, i);
ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
}
- for (; i < num_params; i++) {
+ for (; i < fninfo.num_params; i++) {
LLVMValueRef p = LLVMGetParam(func, i);
p = LLVMBuildBitCast(gallivm->builder, p, ctx->f32, "");
ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
} else {
/* VertexID + BaseVertex */
index = LLVMBuildAdd(gallivm->builder,
- LLVMGetParam(func, ctx->param_vertex_id),
+ ctx->abi.vertex_id,
LLVMGetParam(func, user_sgpr_base +
SI_SGPR_BASE_VERTEX), "");
}
index = LLVMBuildBitCast(gallivm->builder, index, ctx->f32, "");
ret = LLVMBuildInsertValue(gallivm->builder, ret, index,
- num_params++, "");
+ fninfo.num_params + i, "");
}
si_llvm_build_ret(ctx, ret);
{
struct gallivm_state *gallivm = &ctx->gallivm;
struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
- LLVMTypeRef params[32];
+ struct si_function_info fninfo;
LLVMValueRef func;
- int last_sgpr, num_params = 0;
+
+ si_init_function_info(&fninfo);
if (ctx->screen->b.chip_class >= GFX9) {
- params[num_params++] = ctx->i64;
- params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
- params[num_params++] = ctx->i32; /* wave info */
- params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[num_params++] = ctx->i64;
- params[num_params++] = ctx->i64;
- params[num_params++] = ctx->i64;
- params[num_params++] = ctx->i64;
- params[num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
- params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32;
+ add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* wave info */
+ ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
} else {
- params[num_params++] = ctx->i64;
- params[num_params++] = ctx->i64;
- params[num_params++] = ctx->i64;
- params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
- params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32;
- params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
- params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
- }
- last_sgpr = num_params - 1;
-
- params[num_params++] = ctx->i32; /* patch index within the wave (REL_PATCH_ID) */
- params[num_params++] = ctx->i32; /* invocation ID within the patch */
- params[num_params++] = ctx->i32; /* LDS offset where tess factors should be loaded from */
+ add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ }
+
+ add_arg(&fninfo, ARG_VGPR, ctx->i32); /* VGPR gap */
+ add_arg(&fninfo, ARG_VGPR, ctx->i32); /* VGPR gap */
+ unsigned tess_factors_idx =
+ add_arg(&fninfo, ARG_VGPR, ctx->i32); /* patch index within the wave (REL_PATCH_ID) */
+ add_arg(&fninfo, ARG_VGPR, ctx->i32); /* invocation ID within the patch */
+ add_arg(&fninfo, ARG_VGPR, ctx->i32); /* LDS offset where tess factors should be loaded from */
/* Create the function. */
- si_create_function(ctx, "tcs_epilog", NULL, 0, params, num_params, last_sgpr,
+ si_create_function(ctx, "tcs_epilog", NULL, 0, &fninfo,
ctx->screen->b.chip_class >= CIK ? 128 : 64);
declare_lds_as_pointer(ctx);
func = ctx->main_fn;
si_write_tess_factors(bld_base,
- LLVMGetParam(func, last_sgpr + 1),
- LLVMGetParam(func, last_sgpr + 2),
- LLVMGetParam(func, last_sgpr + 3));
+ LLVMGetParam(func, tess_factors_idx),
+ LLVMGetParam(func, tess_factors_idx + 1),
+ LLVMGetParam(func, tess_factors_idx + 2));
LLVMBuildRetVoid(gallivm->builder);
}
union si_shader_part_key *key)
{
struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMTypeRef *params;
+ struct si_function_info fninfo;
LLVMValueRef ret, func;
- int last_sgpr, num_params, num_returns, i, num_color_channels;
+ int num_returns, i, num_color_channels;
assert(si_need_ps_prolog(key));
- /* Number of inputs + 8 color elements. */
- params = alloca((key->ps_prolog.num_input_sgprs +
- key->ps_prolog.num_input_vgprs + 8) *
- sizeof(LLVMTypeRef));
+ si_init_function_info(&fninfo);
/* Declare inputs. */
- num_params = 0;
for (i = 0; i < key->ps_prolog.num_input_sgprs; i++)
- params[num_params++] = ctx->i32;
- last_sgpr = num_params - 1;
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
for (i = 0; i < key->ps_prolog.num_input_vgprs; i++)
- params[num_params++] = ctx->f32;
+ add_arg(&fninfo, ARG_VGPR, ctx->f32);
/* Declare outputs (same as inputs + add colors if needed) */
- num_returns = num_params;
+ num_returns = fninfo.num_params;
num_color_channels = util_bitcount(key->ps_prolog.colors_read);
for (i = 0; i < num_color_channels; i++)
- params[num_returns++] = ctx->f32;
+ fninfo.types[num_returns++] = ctx->f32;
/* Create the function. */
- si_create_function(ctx, "ps_prolog", params, num_returns, params,
- num_params, last_sgpr, 0);
+ si_create_function(ctx, "ps_prolog", fninfo.types, num_returns,
+ &fninfo, 0);
func = ctx->main_fn;
/* Copy inputs to outputs. This should be no-op, as the registers match,
* but it will prevent the compiler from overwriting them unintentionally.
*/
ret = ctx->return_value;
- for (i = 0; i < num_params; i++) {
+ for (i = 0; i < fninfo.num_params; i++) {
LLVMValueRef p = LLVMGetParam(func, i);
ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
}
}
/* Interpolate colors. */
+ unsigned color_out_idx = 0;
for (i = 0; i < 2; i++) {
unsigned writemask = (key->ps_prolog.colors_read >> (i * 4)) & 0xf;
unsigned face_vgpr = key->ps_prolog.num_input_sgprs +
while (writemask) {
unsigned chan = u_bit_scan(&writemask);
ret = LLVMBuildInsertValue(gallivm->builder, ret, color[chan],
- num_params++, "");
+ fninfo.num_params + color_out_idx++, "");
}
}
{
struct gallivm_state *gallivm = &ctx->gallivm;
struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
- LLVMTypeRef params[16+8*4+3];
+ struct si_function_info fninfo;
LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
- int last_sgpr, num_params = 0, i;
+ int i;
struct si_ps_exports exp = {};
+ si_init_function_info(&fninfo);
+
/* Declare input SGPRs. */
- params[ctx->param_rw_buffers = num_params++] = ctx->i64;
- params[ctx->param_const_and_shader_buffers = num_params++] = ctx->i64;
- params[ctx->param_samplers_and_images = num_params++] = ctx->i64;
- assert(num_params == SI_PARAM_ALPHA_REF);
- params[SI_PARAM_ALPHA_REF] = ctx->f32;
- last_sgpr = SI_PARAM_ALPHA_REF;
+ ctx->param_rw_buffers = add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ ctx->param_const_and_shader_buffers = add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ ctx->param_samplers_and_images = add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ add_arg_checked(&fninfo, ARG_SGPR, ctx->f32, SI_PARAM_ALPHA_REF);
/* Declare input VGPRs. */
- num_params = (last_sgpr + 1) +
+ unsigned required_num_params =
+ fninfo.num_sgpr_params +
util_bitcount(key->ps_epilog.colors_written) * 4 +
key->ps_epilog.writes_z +
key->ps_epilog.writes_stencil +
key->ps_epilog.writes_samplemask;
- num_params = MAX2(num_params,
- last_sgpr + 1 + PS_EPILOG_SAMPLEMASK_MIN_LOC + 1);
+ required_num_params = MAX2(required_num_params,
+ fninfo.num_sgpr_params + PS_EPILOG_SAMPLEMASK_MIN_LOC + 1);
- assert(num_params <= ARRAY_SIZE(params));
-
- for (i = last_sgpr + 1; i < num_params; i++)
- params[i] = ctx->f32;
+ while (fninfo.num_params < required_num_params)
+ add_arg(&fninfo, ARG_VGPR, ctx->f32);
/* Create the function. */
- si_create_function(ctx, "ps_epilog", NULL, 0, params, num_params,
- last_sgpr, 0);
+ si_create_function(ctx, "ps_epilog", NULL, 0, &fninfo, 0);
/* Disable elimination of unused inputs. */
si_llvm_add_attribute(ctx->main_fn,
"InitialPSInputAddr", 0xffffff);
/* Process colors. */
- unsigned vgpr = last_sgpr + 1;
+ unsigned vgpr = fninfo.num_sgpr_params;
unsigned colors_written = key->ps_epilog.colors_written;
int last_color_export = -1;
color[i] = LLVMGetParam(ctx->main_fn, vgpr++);
si_export_mrt_color(bld_base, color, mrt,
- num_params - 1,
+ fninfo.num_params - 1,
mrt == last_color_export, &exp);
}
if (r)
return r;
} else {
- /* The shader consists of 2-3 parts:
+ /* The shader consists of several parts:
*
* - the middle part is the user shader, it has 1 variant only
* and it was compiled during the creation of the shader
* - the epilog part is inserted at the end
*
* The prolog and epilog have many (but simple) variants.
+ *
+ * Starting with gfx9, geometry and tessellation control
+ * shaders also contain the prolog and user shader parts of
+ * the previous shader stage.
*/
+ if (!mainp)
+ return -1;
+
/* Copy the compiled TGSI shader data over. */
shader->is_binary_shared = true;
shader->binary = mainp->binary;