static void gather_intrinsic_load_deref_input_info(const nir_shader *nir,
const nir_intrinsic_instr *instr,
const nir_deref_instr *deref,
- struct tgsi_shader_info *info)
+ struct si_shader_info *info)
{
switch (nir->info.stage) {
case MESA_SHADER_VERTEX:
static void gather_intrinsic_load_deref_output_info(const nir_shader *nir,
const nir_intrinsic_instr *instr,
nir_variable *var,
- struct tgsi_shader_info *info)
+ struct si_shader_info *info)
{
assert(var && var->data.mode == nir_var_shader_out);
static void gather_intrinsic_store_deref_output_info(const nir_shader *nir,
const nir_intrinsic_instr *instr,
const nir_deref_instr *deref,
- struct tgsi_shader_info *info)
+ struct si_shader_info *info)
{
switch (nir->info.stage) {
case MESA_SHADER_VERTEX: /* needed by LS, ES */
}
static void scan_instruction(const struct nir_shader *nir,
- struct tgsi_shader_info *info,
+ struct si_shader_info *info,
nir_instr *instr)
{
if (instr->type == nir_instr_type_alu) {
case nir_intrinsic_load_num_work_groups:
info->uses_grid_size = true;
break;
+ case nir_intrinsic_load_local_invocation_index:
+ case nir_intrinsic_load_subgroup_id:
+ case nir_intrinsic_load_num_subgroups:
+ info->uses_subgroup_info = true;
+ break;
case nir_intrinsic_load_local_group_size:
/* The block size is translated to IMM with a fixed block size. */
if (info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0)
}
}
-void si_nir_scan_tess_ctrl(const struct nir_shader *nir,
- struct tgsi_tessctrl_info *out)
-{
- memset(out, 0, sizeof(*out));
-
- if (nir->info.stage != MESA_SHADER_TESS_CTRL)
- return;
-
- out->tessfactors_are_def_in_all_invocs =
- ac_are_tessfactors_def_in_all_invocs(nir);
-}
-
static void scan_output_slot(const nir_variable *var,
unsigned var_idx,
unsigned component, unsigned num_components,
- struct tgsi_shader_info *info)
+ struct si_shader_info *info)
{
assert(component + num_components <= 4);
assert(component < 4);
ubyte usagemask = ((1 << num_components) - 1) << component;
unsigned gs_out_streams;
- if (var->data.stream & (1u << 31)) {
- gs_out_streams = var->data.stream & ~(1u << 31);
+ if (var->data.stream & NIR_STREAM_PACKED) {
+ gs_out_streams = var->data.stream & ~NIR_STREAM_PACKED;
} else {
assert(var->data.stream < 4);
gs_out_streams = 0;
static void scan_output_helper(const nir_variable *var,
unsigned location,
const struct glsl_type *type,
- struct tgsi_shader_info *info)
+ struct si_shader_info *info)
{
- if (glsl_type_is_struct(type)) {
+ if (glsl_type_is_struct(type) || glsl_type_is_interface(type)) {
for (unsigned i = 0; i < glsl_get_length(type); i++) {
const struct glsl_type *ft = glsl_get_struct_field(type, i);
scan_output_helper(var, location, ft, info);
}
void si_nir_scan_shader(const struct nir_shader *nir,
- struct tgsi_shader_info *info)
+ struct si_shader_info *info)
{
nir_function *func;
unsigned i;
if (nir->num_uniforms > 0)
info->const_buffers_declared |= 1;
info->images_declared = u_bit_consecutive(0, nir->info.num_images);
- info->samplers_declared = u_bit_consecutive(0, nir->info.num_textures);
+ info->msaa_images_declared = u_bit_consecutive(0, nir->info.last_msaa_image + 1);
+ info->samplers_declared = nir->info.textures_used;
info->num_written_clipdistance = nir->info.clip_distance_array_size;
info->num_written_culldistance = nir->info.cull_distance_array_size;
if (info->processor == PIPE_SHADER_FRAGMENT)
info->uses_kill = nir->info.fs.uses_discard;
+ if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
+ info->tessfactors_are_def_in_all_invocs =
+ ac_are_tessfactors_def_in_all_invocs(nir);
+ }
+
func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
nir_foreach_block(block, func->impl) {
nir_foreach_instr(instr, block)
}
}
-void
+static void
si_nir_opts(struct nir_shader *nir)
{
bool progress;
- unsigned lower_flrp =
- (nir->options->lower_flrp16 ? 16 : 0) |
- (nir->options->lower_flrp32 ? 32 : 0) |
- (nir->options->lower_flrp64 ? 64 : 0);
do {
progress = false;
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
NIR_PASS(progress, nir, nir_opt_dead_write_vars);
- NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL);
+ NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
NIR_PASS_V(nir, nir_lower_phis_to_scalar);
/* (Constant) copy propagation is needed for txf with offsets. */
NIR_PASS(progress, nir, nir_opt_algebraic);
NIR_PASS(progress, nir, nir_opt_constant_folding);
- if (lower_flrp != 0) {
+ if (!nir->info.flrp_lowered) {
+ unsigned lower_flrp =
+ (nir->options->lower_flrp16 ? 16 : 0) |
+ (nir->options->lower_flrp32 ? 32 : 0) |
+ (nir->options->lower_flrp64 ? 64 : 0);
+ assert(lower_flrp);
bool lower_flrp_progress = false;
NIR_PASS(lower_flrp_progress, nir, nir_lower_flrp,
/* Nothing should rematerialize any flrps, so we only
* need to do this lowering once.
*/
- lower_flrp = 0;
+ nir->info.flrp_lowered = true;
}
NIR_PASS(progress, nir, nir_opt_undef);
}
}
-void si_nir_lower_ps_inputs(struct nir_shader *nir)
+static void si_nir_lower_ps_inputs(struct nir_shader *nir)
{
if (nir->info.stage != MESA_SHADER_FRAGMENT)
return;
nir_var_shader_in);
}
-/**
- * Perform "lowering" operations on the NIR that are run once when the shader
- * selector is created.
- */
-void si_lower_nir(struct si_shader_selector *sel)
+void si_nir_adjust_driver_locations(struct nir_shader *nir)
{
/* Adjust the driver location of inputs and outputs. The state tracker
* interprets them as slots, while the ac/nir backend interprets them
* as individual components.
*/
- if (sel->nir->info.stage != MESA_SHADER_FRAGMENT) {
- nir_foreach_variable(variable, &sel->nir->inputs)
+ if (nir->info.stage != MESA_SHADER_FRAGMENT) {
+ nir_foreach_variable(variable, &nir->inputs)
variable->data.driver_location *= 4;
}
- nir_foreach_variable(variable, &sel->nir->outputs) {
+ nir_foreach_variable(variable, &nir->outputs)
variable->data.driver_location *= 4;
+}
- if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) {
- if (variable->data.location == FRAG_RESULT_DEPTH)
- variable->data.driver_location += 2;
- else if (variable->data.location == FRAG_RESULT_STENCIL)
- variable->data.driver_location += 1;
- }
- }
-
+/**
+ * Perform "lowering" operations on the NIR that are run once when the shader
+ * selector is created.
+ */
+static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
+{
/* Perform lowerings (and optimizations) of code.
*
* Performance considerations aside, we must:
static const struct nir_lower_tex_options lower_tex_options = {
.lower_txp = ~0u,
};
- NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
+ NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
const nir_lower_subgroups_options subgroups_options = {
.subgroup_size = 64,
.lower_vote_trivial = false,
.lower_vote_eq_to_ballot = true,
};
- NIR_PASS_V(sel->nir, nir_lower_subgroups, &subgroups_options);
+ NIR_PASS_V(nir, nir_lower_subgroups, &subgroups_options);
+
+ /* Lower load constants to scalar and then clean up the mess */
+ NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
+ NIR_PASS_V(nir, nir_lower_var_copies);
+ NIR_PASS_V(nir, nir_lower_pack);
+ NIR_PASS_V(nir, nir_opt_access);
+ si_nir_opts(nir);
+
+ /* Lower large variables that are always constant with load_constant
+ * intrinsics, which get turned into PC-relative loads from a data
+ * section next to the shader.
+ *
+ * st/mesa calls finalize_nir twice, but we can't call this pass twice.
+ */
+ bool changed = false;
+ if (!nir->constant_data) {
+ NIR_PASS(changed, nir, nir_opt_large_constants,
+ glsl_get_natural_size_align_bytes, 16);
+ }
- ac_lower_indirect_derefs(sel->nir, sel->screen->info.chip_class);
+ changed |= ac_lower_indirect_derefs(nir, sscreen->info.chip_class);
+ if (changed)
+ si_nir_opts(nir);
- si_nir_opts(sel->nir);
+ NIR_PASS_V(nir, nir_lower_bool_to_int32);
+ NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
+}
- NIR_PASS_V(sel->nir, nir_lower_bool_to_int32);
+void si_finalize_nir(struct pipe_screen *screen, void *nirptr, bool optimize)
+{
+ struct si_screen *sscreen = (struct si_screen *)screen;
+ struct nir_shader *nir = (struct nir_shader *)nirptr;
- /* Strip the resulting shader so that the shader cache is more likely
- * to hit from other similar shaders.
- */
- nir_strip(sel->nir);
+ nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+ si_nir_lower_ps_inputs(nir);
+ si_lower_nir(sscreen, nir);
}
static void declare_nir_input_vs(struct si_shader_context *ctx,
case INTERP_MODE_SMOOTH:
case INTERP_MODE_NONE:
if (location == INTERP_CENTER)
- return ctx->abi.persp_center;
+ return ac_get_arg(&ctx->ac, ctx->args.persp_center);
else if (location == INTERP_CENTROID)
return ctx->abi.persp_centroid;
else if (location == INTERP_SAMPLE)
- return ctx->abi.persp_sample;
+ return ac_get_arg(&ctx->ac, ctx->args.persp_sample);
break;
case INTERP_MODE_NOPERSPECTIVE:
if (location == INTERP_CENTER)
- return ctx->abi.linear_center;
+ return ac_get_arg(&ctx->ac, ctx->args.linear_center);
else if (location == INTERP_CENTROID)
- return ctx->abi.linear_centroid;
+ return ac_get_arg(&ctx->ac, ctx->args.linear_centroid);
else if (location == INTERP_SAMPLE)
- return ctx->abi.linear_sample;
+ return ac_get_arg(&ctx->ac, ctx->args.linear_sample);
break;
default:
assert(!"Unhandled interpolation mode.");
unsigned const_index = base_index + constant_index;
assert(!descriptor_set);
- assert(!image || desc_type == AC_DESC_IMAGE || desc_type == AC_DESC_BUFFER);
+ assert(desc_type <= AC_DESC_BUFFER);
if (bindless) {
- LLVMValueRef list =
- LLVMGetParam(ctx->main_fn, ctx->param_bindless_samplers_and_images);
+ LLVMValueRef list = ac_get_arg(&ctx->ac, ctx->bindless_samplers_and_images);
/* dynamic_index is the bindless handle */
if (image) {
- /* For simplicity, bindless image descriptors use fixed
- * 16-dword slots for now.
- */
+ /* Bindless image descriptors use 16-dword slots. */
dynamic_index = LLVMBuildMul(ctx->ac.builder, dynamic_index,
LLVMConstInt(ctx->i64, 2, 0), "");
+ /* FMASK is right after the image. */
+ if (desc_type == AC_DESC_FMASK) {
+ dynamic_index = LLVMBuildAdd(ctx->ac.builder, dynamic_index,
+ ctx->i32_1, "");
+ }
return si_load_image_desc(ctx, list, dynamic_index, desc_type,
write, true);
unsigned num_slots = image ? ctx->num_images : ctx->num_samplers;
assert(const_index < num_slots || dynamic_index);
- LLVMValueRef list = LLVMGetParam(ctx->main_fn, ctx->param_samplers_and_images);
+ LLVMValueRef list = ac_get_arg(&ctx->ac, ctx->samplers_and_images);
LLVMValueRef index = LLVMConstInt(ctx->ac.i32, const_index, false);
if (dynamic_index) {
}
if (image) {
+ /* FMASKs are separate from images. */
+ if (desc_type == AC_DESC_FMASK) {
+ index = LLVMBuildAdd(ctx->ac.builder, index,
+ LLVMConstInt(ctx->i32, SI_NUM_IMAGES, 0), "");
+ }
index = LLVMBuildSub(ctx->ac.builder,
- LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0),
+ LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS - 1, 0),
index, "");
return si_load_image_desc(ctx, list, index, desc_type, write, false);
}
index = LLVMBuildAdd(ctx->ac.builder, index,
- LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), "");
+ LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS / 2, 0), "");
return si_load_sampler_desc(ctx, list, index, desc_type);
}
bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
{
- struct tgsi_shader_info *info = &ctx->shader->selector->info;
+ struct si_shader_info *info = &ctx->shader->selector->info;
if (nir->info.stage == MESA_SHADER_VERTEX) {
uint64_t processed_inputs = 0;
ctx->shader->key.mono.u.ps.interpolate_at_sample_force_center;
} else if (nir->info.stage == MESA_SHADER_COMPUTE) {
if (nir->info.cs.user_data_components_amd) {
- ctx->abi.user_data = LLVMGetParam(ctx->main_fn, ctx->param_cs_user_data);
+ ctx->abi.user_data = ac_get_arg(&ctx->ac, ctx->cs_user_data);
ctx->abi.user_data = ac_build_expand_to_vec4(&ctx->ac, ctx->abi.user_data,
nir->info.cs.user_data_components_amd);
}
assert(gl_shader_stage_is_compute(nir->info.stage));
si_declare_compute_memory(ctx);
}
- ac_nir_translate(&ctx->ac, &ctx->abi, nir);
+ ac_nir_translate(&ctx->ac, &ctx->abi, &ctx->args, nir);
return true;
}