LLVMValueRef esgs_ring;
LLVMValueRef gsvs_ring[4];
LLVMValueRef gs_next_vertex[4];
+ LLVMValueRef return_value;
LLVMTypeRef voidt;
LLVMTypeRef i1;
static LLVMValueRef get_instance_index_for_fetch(
struct radeon_llvm_context *radeon_bld,
- unsigned divisor)
+ unsigned param_start_instance, unsigned divisor)
{
struct si_shader_context *ctx =
si_shader_context(&radeon_bld->soa.bld_base);
result = LLVMBuildUDiv(gallivm->builder, result,
lp_build_const_int32(gallivm, divisor), "");
- return LLVMBuildAdd(gallivm->builder, result, LLVMGetParam(
- radeon_bld->main_fn, SI_PARAM_START_INSTANCE), "");
+ return LLVMBuildAdd(gallivm->builder, result,
+ LLVMGetParam(radeon_bld->main_fn, param_start_instance), "");
}
static void declare_input_vs(
struct gallivm_state *gallivm = base->gallivm;
struct si_shader_context *ctx =
si_shader_context(&radeon_bld->soa.bld_base);
- unsigned divisor = ctx->shader->key.vs.instance_divisors[input_index];
+ unsigned divisor =
+ ctx->shader->key.vs.prolog.instance_divisors[input_index];
unsigned chan;
if (divisor) {
/* Build index from instance ID, start instance and divisor */
ctx->shader->uses_instanceid = true;
- buffer_index = get_instance_index_for_fetch(&ctx->radeon_bld, divisor);
+ buffer_index = get_instance_index_for_fetch(&ctx->radeon_bld,
+ SI_PARAM_START_INSTANCE,
+ divisor);
} else {
/* Load the buffer index for vertices. */
LLVMValueRef vertex_id = LLVMGetParam(ctx->radeon_bld.main_fn,
static unsigned select_interp_param(struct si_shader_context *ctx,
unsigned param)
{
- if (!ctx->shader->key.ps.force_persample_interp)
+ if (!ctx->shader->key.ps.prolog.force_persample_interp)
return param;
/* If the shader doesn't use center/centroid, just return the parameter.
intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
if (semantic_name == TGSI_SEMANTIC_COLOR &&
- ctx->shader->key.ps.color_two_side) {
+ ctx->shader->key.ps.prolog.color_two_side) {
LLVMValueRef args[4];
LLVMValueRef is_face_positive;
LLVMValueRef back_attr_number;
if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
const union si_shader_key *key = &ctx->shader->key;
- unsigned col_formats = key->ps.spi_shader_col_format;
+ unsigned col_formats = key->ps.epilog.spi_shader_col_format;
int cbuf = target - V_008DFC_SQ_EXP_MRT;
assert(cbuf >= 0 && cbuf < 8);
spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf;
- is_int8 = (key->ps.color_is_int8 >> cbuf) & 0x1;
+ is_int8 = (key->ps.epilog.color_is_int8 >> cbuf) & 0x1;
}
args[4] = uint->zero; /* COMPR flag */
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- if (ctx->shader->key.ps.alpha_func != PIPE_FUNC_NEVER) {
+ if (ctx->shader->key.ps.epilog.alpha_func != PIPE_FUNC_NEVER) {
LLVMValueRef alpha_ref = LLVMGetParam(ctx->radeon_bld.main_fn,
SI_PARAM_ALPHA_REF);
LLVMValueRef alpha_pass =
lp_build_cmp(&bld_base->base,
- ctx->shader->key.ps.alpha_func,
+ ctx->shader->key.ps.epilog.alpha_func,
alpha, alpha_ref);
LLVMValueRef arg =
lp_build_select(&bld_base->base,
}
static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base,
- LLVMValueRef alpha)
+ LLVMValueRef alpha,
+ unsigned samplemask_param)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
/* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */
coverage = LLVMGetParam(ctx->radeon_bld.main_fn,
- SI_PARAM_SAMPLE_COVERAGE);
+ samplemask_param);
coverage = bitcast(bld_base, TGSI_TYPE_SIGNED, coverage);
coverage = lp_build_intrinsic(gallivm->builder, "llvm.ctpop.i32",
invocation_id, bld_base->uint_bld.zero, ""));
/* Determine the layout of one tess factor element in the buffer. */
- switch (shader->key.tcs.prim_mode) {
+ switch (shader->key.tcs.epilog.prim_mode) {
case PIPE_PRIM_LINES:
stride = 2; /* 2 dwords, 1 vec2 store */
outer_comps = 2;
static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
LLVMValueRef *color, unsigned index,
+ unsigned samplemask_param,
bool is_last)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
int i;
/* Clamp color */
- if (ctx->shader->key.ps.clamp_color)
+ if (ctx->shader->key.ps.epilog.clamp_color)
for (i = 0; i < 4; i++)
color[i] = radeon_llvm_saturate(bld_base, color[i]);
/* Alpha to one */
- if (ctx->shader->key.ps.alpha_to_one)
+ if (ctx->shader->key.ps.epilog.alpha_to_one)
color[3] = base->one;
/* Alpha test */
if (index == 0 &&
- ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)
+ ctx->shader->key.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS)
si_alpha_test(bld_base, color[3]);
/* Line & polygon smoothing */
- if (ctx->shader->key.ps.poly_line_smoothing)
- color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3]);
+ if (ctx->shader->key.ps.epilog.poly_line_smoothing)
+ color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3],
+ samplemask_param);
/* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
- if (ctx->shader->key.ps.last_cbuf > 0) {
+ if (ctx->shader->key.ps.epilog.last_cbuf > 0) {
LLVMValueRef args[8][9];
int c, last = -1;
/* Get the export arguments, also find out what the last one is. */
- for (c = 0; c <= ctx->shader->key.ps.last_cbuf; c++) {
+ for (c = 0; c <= ctx->shader->key.ps.epilog.last_cbuf; c++) {
si_llvm_init_export_args(bld_base, color,
V_008DFC_SQ_EXP_MRT + c, args[c]);
if (args[c][0] != bld_base->uint_bld.zero)
}
/* Emit all exports. */
- for (c = 0; c <= ctx->shader->key.ps.last_cbuf; c++) {
+ for (c = 0; c <= ctx->shader->key.ps.epilog.last_cbuf; c++) {
if (is_last && last == c) {
args[c][1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */
args[c][2] = bld_base->uint_bld.one; /* DONE bit */
* Otherwise, find the last color export.
*/
if (!info->writes_z && !info->writes_stencil && !info->writes_samplemask) {
- unsigned spi_format = shader->key.ps.spi_shader_col_format;
+ unsigned spi_format = shader->key.ps.epilog.spi_shader_col_format;
/* Don't export NULL and return if alpha-test is enabled. */
- if (shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS &&
- shader->key.ps.alpha_func != PIPE_FUNC_NEVER &&
+ if (shader->key.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS &&
+ shader->key.ps.epilog.alpha_func != PIPE_FUNC_NEVER &&
(spi_format & 0xf) == 0)
spi_format |= V_028714_SPI_SHADER_32_AR;
continue;
/* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
- if (shader->key.ps.last_cbuf > 0) {
+ if (shader->key.ps.epilog.last_cbuf > 0) {
/* Just set this if any of the colorbuffers are enabled. */
if (spi_format &
- ((1llu << (4 * (shader->key.ps.last_cbuf + 1))) - 1))
+ ((1llu << (4 * (shader->key.ps.epilog.last_cbuf + 1))) - 1))
last_color_export = i;
continue;
}
ctx->radeon_bld.soa.outputs[i][j], "");
si_export_mrt_color(bld_base, color, semantic_index,
+ SI_PARAM_SAMPLE_COVERAGE,
last_color_export == i);
break;
default:
.emit = build_interp_intrinsic,
};
+static void si_create_function(struct si_shader_context *ctx,
+ LLVMTypeRef *returns, unsigned num_returns,
+ LLVMTypeRef *params, unsigned num_params,
+ int last_array_pointer, int last_sgpr)
+{
+ int i;
+
+ radeon_llvm_create_func(&ctx->radeon_bld, returns, num_returns,
+ params, num_params);
+ radeon_llvm_shader_type(ctx->radeon_bld.main_fn, ctx->type);
+ ctx->return_value = LLVMGetUndef(ctx->radeon_bld.return_type);
+
+ for (i = 0; i <= last_sgpr; ++i) {
+ LLVMValueRef P = LLVMGetParam(ctx->radeon_bld.main_fn, i);
+
+ /* We tell llvm that array inputs are passed by value to allow Sinking pass
+ * to move load. Inputs are constant so this is fine. */
+ if (i <= last_array_pointer)
+ LLVMAddAttribute(P, LLVMByValAttribute);
+ else
+ LLVMAddAttribute(P, LLVMInRegAttribute);
+ }
+}
+
static void create_meta_data(struct si_shader_context *ctx)
{
struct gallivm_state *gallivm = ctx->radeon_bld.soa.bld_base.base.gallivm;
}
}
+static unsigned llvm_get_type_size(LLVMTypeRef type)
+{
+ LLVMTypeKind kind = LLVMGetTypeKind(type);
+
+ switch (kind) {
+ case LLVMIntegerTypeKind:
+ return LLVMGetIntTypeWidth(type) / 8;
+ case LLVMFloatTypeKind:
+ return 4;
+ case LLVMPointerTypeKind:
+ return 8;
+ case LLVMVectorTypeKind:
+ return LLVMGetVectorSize(type) *
+ llvm_get_type_size(LLVMGetElementType(type));
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static void declare_tess_lds(struct si_shader_context *ctx)
+{
+ struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
+ LLVMTypeRef i32 = ctx->radeon_bld.soa.bld_base.uint_bld.elem_type;
+
+ /* This is the upper bound, maximum is 32 inputs times 32 vertices */
+ unsigned vertex_data_dw_size = 32*32*4;
+ unsigned patch_data_dw_size = 32*4;
+ /* The formula is: TCS inputs + TCS outputs + TCS patch outputs. */
+ unsigned patch_dw_size = vertex_data_dw_size*2 + patch_data_dw_size;
+ unsigned lds_dwords = patch_dw_size;
+
+ /* The actual size is computed outside of the shader to reduce
+ * the number of shader variants. */
+ ctx->lds =
+ LLVMAddGlobalInAddressSpace(gallivm->module,
+ LLVMArrayType(i32, lds_dwords),
+ "tess_lds",
+ LOCAL_ADDR_SPACE);
+}
+
static void create_function(struct si_shader_context *ctx)
{
struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
}
assert(num_params <= Elements(params));
- radeon_llvm_create_func(&ctx->radeon_bld, params, num_params);
- radeon_llvm_shader_type(ctx->radeon_bld.main_fn, ctx->type);
- for (i = 0; i <= last_sgpr; ++i) {
- LLVMValueRef P = LLVMGetParam(ctx->radeon_bld.main_fn, i);
+ si_create_function(ctx, NULL, 0, params,
+ num_params, last_array_pointer, last_sgpr);
- /* We tell llvm that array inputs are passed by value to allow Sinking pass
- * to move load. Inputs are constant so this is fine. */
- if (i <= last_array_pointer)
- LLVMAddAttribute(P, LLVMByValAttribute);
- else
- LLVMAddAttribute(P, LLVMInRegAttribute);
- }
+ shader->num_input_sgprs = 0;
+ shader->num_input_vgprs = 0;
+
+ for (i = 0; i <= last_sgpr; ++i)
+ shader->num_input_sgprs += llvm_get_type_size(params[i]) / 4;
+
+ /* Unused fragment shader inputs are eliminated by the compiler,
+ * so we don't know yet how many there will be.
+ */
+ if (ctx->type != TGSI_PROCESSOR_FRAGMENT)
+ for (; i < num_params; ++i)
+ shader->num_input_vgprs += llvm_get_type_size(params[i]) / 4;
if (bld_base->info &&
(bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
if ((ctx->type == TGSI_PROCESSOR_VERTEX && shader->key.vs.as_ls) ||
ctx->type == TGSI_PROCESSOR_TESS_CTRL ||
- ctx->type == TGSI_PROCESSOR_TESS_EVAL) {
- /* This is the upper bound, maximum is 32 inputs times 32 vertices */
- unsigned vertex_data_dw_size = 32*32*4;
- unsigned patch_data_dw_size = 32*4;
- /* The formula is: TCS inputs + TCS outputs + TCS patch outputs. */
- unsigned patch_dw_size = vertex_data_dw_size*2 + patch_data_dw_size;
- unsigned lds_dwords = patch_dw_size;
-
- /* The actual size is computed outside of the shader to reduce
- * the number of shader variants. */
- ctx->lds =
- LLVMAddGlobalInAddressSpace(gallivm->module,
- LLVMArrayType(ctx->i32, lds_dwords),
- "tess_lds",
- LOCAL_ADDR_SPACE);
- }
+ ctx->type == TGSI_PROCESSOR_TESS_EVAL)
+ declare_tess_lds(ctx);
}
static void preload_constants(struct si_shader_context *ctx)
si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs);
- LLVMBuildRetVoid(bld_base->base.gallivm->builder);
+ LLVMBuildRet(gallivm->builder, ctx->return_value);
/* Dump LLVM IR before any optimization passes */
if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
switch (shader) {
case PIPE_SHADER_VERTEX:
fprintf(f, " instance_divisors = {");
- for (i = 0; i < Elements(key->vs.instance_divisors); i++)
+ for (i = 0; i < Elements(key->vs.prolog.instance_divisors); i++)
fprintf(f, !i ? "%u" : ", %u",
- key->vs.instance_divisors[i]);
+ key->vs.prolog.instance_divisors[i]);
fprintf(f, "}\n");
fprintf(f, " as_es = %u\n", key->vs.as_es);
fprintf(f, " as_ls = %u\n", key->vs.as_ls);
- fprintf(f, " export_prim_id = %u\n", key->vs.export_prim_id);
+ fprintf(f, " export_prim_id = %u\n", key->vs.epilog.export_prim_id);
break;
case PIPE_SHADER_TESS_CTRL:
- fprintf(f, " prim_mode = %u\n", key->tcs.prim_mode);
+ fprintf(f, " prim_mode = %u\n", key->tcs.epilog.prim_mode);
break;
case PIPE_SHADER_TESS_EVAL:
fprintf(f, " as_es = %u\n", key->tes.as_es);
- fprintf(f, " export_prim_id = %u\n", key->tes.export_prim_id);
+ fprintf(f, " export_prim_id = %u\n", key->tes.epilog.export_prim_id);
break;
case PIPE_SHADER_GEOMETRY:
break;
case PIPE_SHADER_FRAGMENT:
- fprintf(f, " spi_shader_col_format = 0x%x\n", key->ps.spi_shader_col_format);
- fprintf(f, " last_cbuf = %u\n", key->ps.last_cbuf);
- fprintf(f, " color_two_side = %u\n", key->ps.color_two_side);
- fprintf(f, " alpha_func = %u\n", key->ps.alpha_func);
- fprintf(f, " alpha_to_one = %u\n", key->ps.alpha_to_one);
- fprintf(f, " poly_stipple = %u\n", key->ps.poly_stipple);
- fprintf(f, " clamp_color = %u\n", key->ps.clamp_color);
+ fprintf(f, " prolog.color_two_side = %u\n", key->ps.prolog.color_two_side);
+ fprintf(f, " prolog.poly_stipple = %u\n", key->ps.prolog.poly_stipple);
+ fprintf(f, " prolog.force_persample_interp = %u\n", key->ps.prolog.force_persample_interp);
+ fprintf(f, " epilog.spi_shader_col_format = 0x%x\n", key->ps.epilog.spi_shader_col_format);
+ fprintf(f, " epilog.color_is_int8 = 0x%X\n", key->ps.epilog.color_is_int8);
+ fprintf(f, " epilog.last_cbuf = %u\n", key->ps.epilog.last_cbuf);
+ fprintf(f, " epilog.alpha_func = %u\n", key->ps.epilog.alpha_func);
+ fprintf(f, " epilog.alpha_to_one = %u\n", key->ps.epilog.alpha_to_one);
+ fprintf(f, " epilog.poly_line_smoothing = %u\n", key->ps.epilog.poly_line_smoothing);
+ fprintf(f, " epilog.clamp_color = %u\n", key->ps.epilog.clamp_color);
break;
default:
struct lp_build_tgsi_context *bld_base;
memset(ctx, 0, sizeof(*ctx));
- radeon_llvm_context_init(&ctx->radeon_bld);
+ radeon_llvm_context_init(&ctx->radeon_bld, "amdgcn--");
ctx->tm = tm;
ctx->screen = sscreen;
if (shader && shader->selector)
ctx->i1 = LLVMInt1TypeInContext(ctx->radeon_bld.gallivm.context);
ctx->i8 = LLVMInt8TypeInContext(ctx->radeon_bld.gallivm.context);
ctx->i32 = LLVMInt32TypeInContext(ctx->radeon_bld.gallivm.context);
- ctx->i128 = LLVMInt128TypeInContext(ctx->radeon_bld.gallivm.context);
+ ctx->i128 = LLVMIntTypeInContext(ctx->radeon_bld.gallivm.context, 128);
ctx->f32 = LLVMFloatTypeInContext(ctx->radeon_bld.gallivm.context);
ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
- if (HAVE_LLVM >= 0x0306) {
- bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32";
- bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
- }
+ bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem;
+ bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32";
+ bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem;
+ bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
}
int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
LLVMModuleRef mod;
int r = 0;
bool poly_stipple = sel->type == PIPE_SHADER_FRAGMENT &&
- shader->key.ps.poly_stipple;
+ shader->key.ps.prolog.poly_stipple;
if (poly_stipple) {
tokens = util_pstipple_create_fragment_shader(tokens, NULL,
goto out;
}
- LLVMBuildRetVoid(bld_base->base.gallivm->builder);
+ LLVMBuildRet(bld_base->base.gallivm->builder, ctx.return_value);
mod = bld_base->base.gallivm->module;
/* Dump LLVM IR before any optimization passes */
radeon_llvm_dispose(&ctx.radeon_bld);
+ /* Calculate the number of fragment input VGPRs. */
+ if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
+ shader->num_input_vgprs = 0;
+ shader->face_vgpr_index = -1;
+
+ if (G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_addr))
+ shader->num_input_vgprs += 2;
+ if (G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_addr))
+ shader->num_input_vgprs += 2;
+ if (G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_addr))
+ shader->num_input_vgprs += 2;
+ if (G_0286CC_PERSP_PULL_MODEL_ENA(shader->config.spi_ps_input_addr))
+ shader->num_input_vgprs += 3;
+ if (G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_addr))
+ shader->num_input_vgprs += 2;
+ if (G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr))
+ shader->num_input_vgprs += 2;
+ if (G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_addr))
+ shader->num_input_vgprs += 2;
+ if (G_0286CC_LINE_STIPPLE_TEX_ENA(shader->config.spi_ps_input_addr))
+ shader->num_input_vgprs += 1;
+ if (G_0286CC_POS_X_FLOAT_ENA(shader->config.spi_ps_input_addr))
+ shader->num_input_vgprs += 1;
+ if (G_0286CC_POS_Y_FLOAT_ENA(shader->config.spi_ps_input_addr))
+ shader->num_input_vgprs += 1;
+ if (G_0286CC_POS_Z_FLOAT_ENA(shader->config.spi_ps_input_addr))
+ shader->num_input_vgprs += 1;
+ if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_addr))
+ shader->num_input_vgprs += 1;
+ if (G_0286CC_FRONT_FACE_ENA(shader->config.spi_ps_input_addr)) {
+ shader->face_vgpr_index = shader->num_input_vgprs;
+ shader->num_input_vgprs += 1;
+ }
+ if (G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr))
+ shader->num_input_vgprs += 1;
+ if (G_0286CC_SAMPLE_COVERAGE_ENA(shader->config.spi_ps_input_addr))
+ shader->num_input_vgprs += 1;
+ if (G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr))
+ shader->num_input_vgprs += 1;
+ }
+
if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
shader->gs_copy_shader = CALLOC_STRUCT(si_shader);
shader->gs_copy_shader->selector = shader->selector;