* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-#include "gallivm/lp_bld_const.h"
-#include "gallivm/lp_bld_intr.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "tgsi/tgsi_build.h"
static void si_init_shader_ctx(struct si_shader_context *ctx,
struct si_screen *sscreen,
- struct si_compiler *compiler);
+ struct ac_llvm_compiler *compiler);
static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT),
LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT),
LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT),
- lp_build_emit_llvm_unary(&ctx->bld_base, TGSI_OPCODE_RCP,
- LLVMGetParam(ctx->main_fn,
- SI_PARAM_POS_W_FLOAT)),
+ ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
+ LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT)),
};
value = ac_build_gather_values(&ctx->ac, pos, 4);
break;
LLVMConstReal(ctx->f32, 0),
LLVMConstReal(ctx->f32, 0)
};
- pos[0] = lp_build_emit_llvm_unary(&ctx->bld_base,
- TGSI_OPCODE_FRC, pos[0]);
- pos[1] = lp_build_emit_llvm_unary(&ctx->bld_base,
- TGSI_OPCODE_FRC, pos[1]);
+ pos[0] = ac_build_fract(&ctx->ac, pos[0], 32);
+ pos[1] = ac_build_fract(&ctx->ac, pos[1], 32);
value = ac_build_gather_values(&ctx->ac, pos, 4);
break;
}
break;
case TGSI_SEMANTIC_HELPER_INVOCATION:
- value = lp_build_intrinsic(ctx->ac.builder,
+ value = ac_build_intrinsic(&ctx->ac,
"llvm.amdgcn.ps.live",
ctx->i1, NULL, 0,
- LP_FUNC_ATTR_READNONE);
+ AC_FUNC_ATTR_READNONE);
value = LLVMBuildNot(ctx->ac.builder, value, "");
value = LLVMBuildSExt(ctx->ac.builder, value, ctx->i32, "");
break;
void si_declare_compute_memory(struct si_shader_context *ctx)
{
struct si_shader_selector *sel = ctx->shader->selector;
+ unsigned lds_size = sel->info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE];
LLVMTypeRef i8p = LLVMPointerType(ctx->i8, AC_LOCAL_ADDR_SPACE);
LLVMValueRef var;
assert(!ctx->ac.lds);
var = LLVMAddGlobalInAddressSpace(ctx->ac.module,
- LLVMArrayType(ctx->i8, sel->local_size),
+ LLVMArrayType(ctx->i8, lds_size),
"compute_lds",
AC_LOCAL_ADDR_SPACE);
LLVMSetAlignment(var, 4);
samplemask_param);
coverage = ac_to_integer(&ctx->ac, coverage);
- coverage = lp_build_intrinsic(ctx->ac.builder, "llvm.ctpop.i32",
+ coverage = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i32",
ctx->i32,
- &coverage, 1, LP_FUNC_ATTR_READNONE);
+ &coverage, 1, AC_FUNC_ATTR_READNONE);
coverage = LLVMBuildUIToFP(ctx->ac.builder, coverage,
ctx->f32, "");
8 + SI_SGPR_VS_STATE_BITS);
#if !HAVE_32BIT_POINTERS
- ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 1,
+ ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 4,
8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES);
#endif
8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
#if !HAVE_32BIT_POINTERS
- ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 1,
+ ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 4,
8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES);
#endif
* an IF statement is added that clamps all colors if the constant
* is true.
*/
- if (ctx->type == PIPE_SHADER_VERTEX) {
- struct lp_build_if_state if_ctx;
- LLVMValueRef cond = NULL;
- LLVMValueRef addr, val;
-
- for (i = 0; i < info->num_outputs; i++) {
- if (info->output_semantic_name[i] != TGSI_SEMANTIC_COLOR &&
- info->output_semantic_name[i] != TGSI_SEMANTIC_BCOLOR)
- continue;
+ struct lp_build_if_state if_ctx;
+ LLVMValueRef cond = NULL;
+ LLVMValueRef addr, val;
- /* We've found a color. */
- if (!cond) {
- /* The state is in the first bit of the user SGPR. */
- cond = LLVMGetParam(ctx->main_fn,
- ctx->param_vs_state_bits);
- cond = LLVMBuildTrunc(ctx->ac.builder, cond,
- ctx->i1, "");
- lp_build_if(&if_ctx, &ctx->gallivm, cond);
- }
+ for (i = 0; i < info->num_outputs; i++) {
+ if (info->output_semantic_name[i] != TGSI_SEMANTIC_COLOR &&
+ info->output_semantic_name[i] != TGSI_SEMANTIC_BCOLOR)
+ continue;
- for (j = 0; j < 4; j++) {
- addr = addrs[4 * i + j];
- val = LLVMBuildLoad(ctx->ac.builder, addr, "");
- val = ac_build_clamp(&ctx->ac, val);
- LLVMBuildStore(ctx->ac.builder, val, addr);
- }
+ /* We've found a color. */
+ if (!cond) {
+ /* The state is in the first bit of the user SGPR. */
+ cond = LLVMGetParam(ctx->main_fn,
+ ctx->param_vs_state_bits);
+ cond = LLVMBuildTrunc(ctx->ac.builder, cond,
+ ctx->i1, "");
+ lp_build_if(&if_ctx, &ctx->gallivm, cond);
}
- if (cond)
- lp_build_endif(&if_ctx);
+ for (j = 0; j < 4; j++) {
+ addr = addrs[4 * i + j];
+ val = LLVMBuildLoad(ctx->ac.builder, addr, "");
+ val = ac_build_clamp(&ctx->ac, val);
+ LLVMBuildStore(ctx->ac.builder, val, addr);
+ }
}
+ if (cond)
+ lp_build_endif(&if_ctx);
+
for (i = 0; i < info->num_outputs; i++) {
outputs[i].semantic_name = info->output_semantic_name[i];
outputs[i].semantic_index = info->output_semantic_index[i];
for (i = 0; i < 2; i++) {
a = LLVMBuildExtractElement(ctx->ac.builder, interp_ij,
LLVMConstInt(ctx->i32, i, 0), "");
- result[i] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_DDX, a);
- result[2+i] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_DDY, a);
+ result[i] = ac_build_ddxy(&ctx->ac, AC_TID_MASK_TOP_LEFT, 1,
+ ac_to_integer(&ctx->ac, a)); /* DDX */
+ result[2+i] = ac_build_ddxy(&ctx->ac, AC_TID_MASK_TOP_LEFT, 2,
+ ac_to_integer(&ctx->ac, a)); /* DDY */
}
return ac_build_gather_values(&ctx->ac, result, 4);
return;
}
- lp_build_intrinsic(ctx->ac.builder,
+ ac_build_intrinsic(&ctx->ac,
"llvm.amdgcn.s.barrier",
- ctx->voidt, NULL, 0, LP_FUNC_ATTR_CONVERGENT);
+ ctx->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT);
}
static const struct lp_build_tgsi_action interp_action = {
* allows the optimization passes to move loads and reduces
* SGPR spilling significantly.
*/
- lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_INREG);
+ ac_add_function_attr(ctx->ac.context, ctx->main_fn, i + 1,
+ AC_FUNC_ATTR_INREG);
if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) {
- lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_NOALIAS);
+ ac_add_function_attr(ctx->ac.context, ctx->main_fn, i + 1,
+ AC_FUNC_ATTR_NOALIAS);
ac_add_attr_dereferenceable(P, UINT64_MAX);
}
}
static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx,
struct si_function_info *fninfo)
{
+ ctx->param_vs_state_bits = add_arg(fninfo, ARG_SGPR, ctx->i32);
add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.base_vertex);
add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.start_instance);
add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.draw_id);
- ctx->param_vs_state_bits = add_arg(fninfo, ARG_SGPR, ctx->i32);
}
static void declare_vs_input_vgprs(struct si_shader_context *ctx,
/* no extra parameters */
} else {
if (shader->is_gs_copy_shader) {
- fninfo.num_params = ctx->param_rw_buffers + 1;
+ fninfo.num_params = ctx->param_vs_state_bits + 1;
fninfo.num_sgpr_params = fninfo.num_params;
}
if (ctx->type == PIPE_SHADER_VERTEX) {
declare_vs_specific_input_sgprs(ctx, &fninfo);
} else {
+ ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tes_offchip_addr = add_arg(&fninfo, ARG_SGPR, ctx->i32);
- if (!HAVE_32BIT_POINTERS) {
- /* Declare as many input SGPRs as the VS has. */
+ /* Declare as many input SGPRs as the VS has. */
+ if (!HAVE_32BIT_POINTERS)
add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
- ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
- }
}
if (!HAVE_32BIT_POINTERS) {
case PIPE_SHADER_TESS_EVAL:
declare_global_desc_pointers(ctx, &fninfo);
declare_per_stage_desc_pointers(ctx, &fninfo, true);
+ ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tes_offchip_addr = add_arg(&fninfo, ARG_SGPR, ctx->i32);
static int si_compile_llvm(struct si_screen *sscreen,
struct ac_shader_binary *binary,
struct si_shader_config *conf,
- struct si_compiler *compiler,
+ struct ac_llvm_compiler *compiler,
LLVMModuleRef mod,
struct pipe_debug_callback *debug,
unsigned processor,
/* Generate code for the hardware VS shader stage to go with a geometry shader */
struct si_shader *
si_generate_gs_copy_shader(struct si_screen *sscreen,
- struct si_compiler *compiler,
+ struct ac_llvm_compiler *compiler,
struct si_shader_selector *gs_selector,
struct pipe_debug_callback *debug)
{
struct si_shader_context ctx;
struct si_shader *shader;
LLVMBuilderRef builder;
- struct si_shader_output_values *outputs;
+ struct si_shader_output_values outputs[SI_MAX_VS_OUTPUTS];
struct tgsi_shader_info *gsinfo = &gs_selector->info;
int i, r;
- outputs = MALLOC(gsinfo->num_outputs * sizeof(outputs[0]));
-
- if (!outputs)
- return NULL;
shader = CALLOC_STRUCT(si_shader);
- if (!shader) {
- FREE(outputs);
+ if (!shader)
return NULL;
- }
/* We can leave the fence as permanently signaled because the GS copy
* shader only becomes visible globally after it has been compiled. */
stream);
}
- if (stream == 0)
+ if (stream == 0) {
+ /* Vertex color clamping.
+ *
+ * This uses a state constant loaded in a user data SGPR and
+ * an IF statement is added that clamps all colors if the constant
+ * is true.
+ */
+ struct lp_build_if_state if_ctx;
+ LLVMValueRef v[2], cond = NULL;
+ LLVMBasicBlockRef blocks[2];
+
+ for (unsigned i = 0; i < gsinfo->num_outputs; i++) {
+ if (gsinfo->output_semantic_name[i] != TGSI_SEMANTIC_COLOR &&
+ gsinfo->output_semantic_name[i] != TGSI_SEMANTIC_BCOLOR)
+ continue;
+
+ /* We've found a color. */
+ if (!cond) {
+ /* The state is in the first bit of the user SGPR. */
+ cond = LLVMGetParam(ctx.main_fn,
+ ctx.param_vs_state_bits);
+ cond = LLVMBuildTrunc(ctx.ac.builder, cond,
+ ctx.i1, "");
+ lp_build_if(&if_ctx, &ctx.gallivm, cond);
+ /* Remember blocks for Phi. */
+ blocks[0] = if_ctx.true_block;
+ blocks[1] = if_ctx.entry_block;
+ }
+
+ for (unsigned j = 0; j < 4; j++) {
+ /* Insert clamp into the true block. */
+ v[0] = ac_build_clamp(&ctx.ac, outputs[i].values[j]);
+ v[1] = outputs[i].values[j];
+
+ /* Insert Phi into the endif block. */
+ LLVMPositionBuilderAtEnd(ctx.ac.builder, if_ctx.merge_block);
+ outputs[i].values[j] = ac_build_phi(&ctx.ac, ctx.f32, 2, v, blocks);
+ LLVMPositionBuilderAtEnd(ctx.ac.builder, if_ctx.true_block);
+ }
+ }
+ if (cond)
+ lp_build_endif(&if_ctx);
+
si_llvm_export_vs(&ctx, outputs, gsinfo->num_outputs);
+ }
LLVMBuildBr(builder, end_bb);
}
r = si_compile_llvm(sscreen, &ctx.shader->binary,
&ctx.shader->config, ctx.compiler,
- ctx.gallivm.module,
+ ctx.ac.module,
debug, PIPE_SHADER_GEOMETRY,
"GS Copy Shader");
if (!r) {
si_llvm_dispose(&ctx);
- FREE(outputs);
-
if (r != 0) {
FREE(shader);
shader = NULL;
static void si_init_shader_ctx(struct si_shader_context *ctx,
struct si_screen *sscreen,
- struct si_compiler *compiler)
+ struct ac_llvm_compiler *compiler)
{
struct lp_build_tgsi_context *bld_base;
LLVMGetParam(ctx->main_fn, param),
LLVMConstInt(ctx->i32, bitoffset, 0),
};
- lp_build_intrinsic(ctx->ac.builder,
+ ac_build_intrinsic(&ctx->ac,
"llvm.amdgcn.init.exec.from.input",
- ctx->voidt, args, 2, LP_FUNC_ATTR_CONVERGENT);
+ ctx->voidt, args, 2, AC_FUNC_ATTR_CONVERGENT);
}
static bool si_vs_needs_prolog(const struct si_shader_selector *sel,
if (!shader->is_monolithic)
ac_init_exec_full_mask(&ctx->ac);
- /* The barrier must execute for all shaders in a
- * threadgroup.
- */
- si_llvm_emit_barrier(NULL, bld_base, NULL);
-
LLVMValueRef num_threads = si_unpack_param(ctx, ctx->param_merged_wave_info, 8, 8);
LLVMValueRef ena =
LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
ac_get_thread_id(&ctx->ac), num_threads, "");
lp_build_if(&ctx->merged_wrap_if_state, &ctx->gallivm, ena);
+
+ /* The barrier must execute for all shaders in a
+ * threadgroup.
+ *
+ * Execute the barrier inside the conditional block,
+ * so that empty waves can jump directly to s_endpgm,
+ * which will also signal the barrier.
+ *
+ * If the shader is TCS and the TCS epilog is present
+ * and contains a barrier, it will wait there and then
+ * reach s_endpgm.
+ */
+ si_llvm_emit_barrier(NULL, bld_base, NULL);
}
}
sel->tcs_info.tessfactors_are_def_in_all_invocs) {
for (unsigned i = 0; i < 6; i++) {
ctx->invoc0_tess_factors[i] =
- lp_build_alloca_undef(&ctx->gallivm, ctx->i32, "");
+ ac_build_alloca_undef(&ctx->ac, ctx->i32, "");
}
}
int i;
for (i = 0; i < 4; i++) {
ctx->gs_next_vertex[i] =
- lp_build_alloca(&ctx->gallivm,
- ctx->i32, "");
+ ac_build_alloca(&ctx->ac, ctx->i32, "");
}
}
if (sel->force_correct_derivs_after_kill) {
- ctx->postponed_kill = lp_build_alloca_undef(&ctx->gallivm, ctx->i1, "");
+ ctx->postponed_kill = ac_build_alloca_undef(&ctx->ac, ctx->i1, "");
/* true = don't kill. */
LLVMBuildStore(ctx->ac.builder, LLVMConstInt(ctx->i1, 1, 0),
ctx->postponed_kill);
si_init_function_info(&fninfo);
for (unsigned i = 0; i < num_parts; ++i) {
- lp_add_function_attr(parts[i], -1, LP_FUNC_ATTR_ALWAYSINLINE);
+ ac_add_function_attr(ctx->ac.context, parts[i], -1,
+ AC_FUNC_ATTR_ALWAYSINLINE);
LLVMSetLinkage(parts[i], LLVMPrivateLinkage);
}
param_size = ac_get_type_size(param_type) / 4;
is_sgpr = ac_is_sgpr_param(param);
- if (is_sgpr)
- lp_add_function_attr(parts[part], param_idx + 1, LP_FUNC_ATTR_INREG);
- else if (out_idx < num_out_sgpr) {
+ if (is_sgpr) {
+ ac_add_function_attr(ctx->ac.context, parts[part],
+ param_idx + 1, AC_FUNC_ATTR_INREG);
+ } else if (out_idx < num_out_sgpr) {
/* Skip returned SGPRs the current part doesn't
* declare on the input. */
out_idx = num_out_sgpr;
}
int si_compile_tgsi_shader(struct si_screen *sscreen,
- struct si_compiler *compiler,
+ struct ac_llvm_compiler *compiler,
struct si_shader *shader,
struct pipe_debug_callback *debug)
{
/* Compile to bytecode. */
r = si_compile_llvm(sscreen, &shader->binary, &shader->config, compiler,
- ctx.gallivm.module, debug, ctx.type, "TGSI shader");
+ ctx.ac.module, debug, ctx.type, "TGSI shader");
si_llvm_dispose(&ctx);
if (r) {
fprintf(stderr, "LLVM failed to compile shader\n");
enum pipe_shader_type type,
bool prolog,
union si_shader_part_key *key,
- struct si_compiler *compiler,
+ struct ac_llvm_compiler *compiler,
struct pipe_debug_callback *debug,
void (*build)(struct si_shader_context *,
union si_shader_part_key *),
}
static bool si_get_vs_prolog(struct si_screen *sscreen,
- struct si_compiler *compiler,
+ struct ac_llvm_compiler *compiler,
struct si_shader *shader,
struct pipe_debug_callback *debug,
struct si_shader *main_part,
* Select and compile (or reuse) vertex shader parts (prolog & epilog).
*/
static bool si_shader_select_vs_parts(struct si_screen *sscreen,
- struct si_compiler *compiler,
+ struct ac_llvm_compiler *compiler,
struct si_shader *shader,
struct pipe_debug_callback *debug)
{
* Select and compile (or reuse) TCS parts (epilog).
*/
static bool si_shader_select_tcs_parts(struct si_screen *sscreen,
- struct si_compiler *compiler,
+ struct ac_llvm_compiler *compiler,
struct si_shader *shader,
struct pipe_debug_callback *debug)
{
* Select and compile (or reuse) GS parts (prolog).
*/
static bool si_shader_select_gs_parts(struct si_screen *sscreen,
- struct si_compiler *compiler,
+ struct ac_llvm_compiler *compiler,
struct si_shader *shader,
struct pipe_debug_callback *debug)
{
* Select and compile (or reuse) pixel shader parts (prolog & epilog).
*/
static bool si_shader_select_ps_parts(struct si_screen *sscreen,
- struct si_compiler *compiler,
+ struct ac_llvm_compiler *compiler,
struct si_shader *shader,
struct pipe_debug_callback *debug)
{
}
}
-int si_shader_create(struct si_screen *sscreen, struct si_compiler *compiler,
+int si_shader_create(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
struct si_shader *shader,
struct pipe_debug_callback *debug)
{