X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_shader_nir.c;h=6aa85ef6c7b25920dadeee92f5837255cef38b5c;hb=03950473df0c8dbcda9ceaa888704cc22d543950;hp=e97e5ccb07b2653806ee8c9c103fe9f6c75814e5;hpb=dd4cc56ebd05074848b1817493f5058e0c1cd9e9;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index e97e5ccb07b..6aa85ef6c7b 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -128,7 +128,7 @@ static void gather_usage(const nir_deref_instr *deref, static void gather_intrinsic_load_deref_input_info(const nir_shader *nir, const nir_intrinsic_instr *instr, const nir_deref_instr *deref, - struct tgsi_shader_info *info) + struct si_shader_info *info) { switch (nir->info.stage) { case MESA_SHADER_VERTEX: @@ -141,7 +141,7 @@ static void gather_intrinsic_load_deref_input_info(const nir_shader *nir, static void gather_intrinsic_load_deref_output_info(const nir_shader *nir, const nir_intrinsic_instr *instr, nir_variable *var, - struct tgsi_shader_info *info) + struct si_shader_info *info) { assert(var && var->data.mode == nir_var_shader_out); @@ -167,7 +167,7 @@ static void gather_intrinsic_load_deref_output_info(const nir_shader *nir, static void gather_intrinsic_store_deref_output_info(const nir_shader *nir, const nir_intrinsic_instr *instr, const nir_deref_instr *deref, - struct tgsi_shader_info *info) + struct si_shader_info *info) { switch (nir->info.stage) { case MESA_SHADER_VERTEX: /* needed by LS, ES */ @@ -181,7 +181,7 @@ static void gather_intrinsic_store_deref_output_info(const nir_shader *nir, } static void scan_instruction(const struct nir_shader *nir, - struct tgsi_shader_info *info, + struct si_shader_info *info, nir_instr *instr) { if (instr->type == nir_instr_type_alu) { @@ -236,6 +236,11 @@ static void scan_instruction(const struct nir_shader *nir, case nir_intrinsic_load_num_work_groups: info->uses_grid_size = true; break; + case nir_intrinsic_load_local_invocation_index: + case nir_intrinsic_load_subgroup_id: + case nir_intrinsic_load_num_subgroups: + info->uses_subgroup_info = true; + break; case nir_intrinsic_load_local_group_size: /* The block size is translated to IMM with a fixed block size. */ if (info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0) @@ -425,22 +430,10 @@ static void scan_instruction(const struct nir_shader *nir, } } -void si_nir_scan_tess_ctrl(const struct nir_shader *nir, - struct tgsi_tessctrl_info *out) -{ - memset(out, 0, sizeof(*out)); - - if (nir->info.stage != MESA_SHADER_TESS_CTRL) - return; - - out->tessfactors_are_def_in_all_invocs = - ac_are_tessfactors_def_in_all_invocs(nir); -} - static void scan_output_slot(const nir_variable *var, unsigned var_idx, unsigned component, unsigned num_components, - struct tgsi_shader_info *info) + struct si_shader_info *info) { assert(component + num_components <= 4); assert(component < 4); @@ -466,8 +459,8 @@ static void scan_output_slot(const nir_variable *var, ubyte usagemask = ((1 << num_components) - 1) << component; unsigned gs_out_streams; - if (var->data.stream & (1u << 31)) { - gs_out_streams = var->data.stream & ~(1u << 31); + if (var->data.stream & NIR_STREAM_PACKED) { + gs_out_streams = var->data.stream & ~NIR_STREAM_PACKED; } else { assert(var->data.stream < 4); gs_out_streams = 0; @@ -540,9 +533,9 @@ static void scan_output_slot(const nir_variable *var, static void scan_output_helper(const nir_variable *var, unsigned location, const struct glsl_type *type, - struct tgsi_shader_info *info) + struct si_shader_info *info) { - if (glsl_type_is_struct(type)) { + if (glsl_type_is_struct(type) || glsl_type_is_interface(type)) { for (unsigned i = 0; i < glsl_get_length(type); i++) { const struct glsl_type *ft = glsl_get_struct_field(type, i); scan_output_helper(var, location, ft, info); @@ -586,7 +579,7 @@ static void scan_output_helper(const nir_variable *var, } void si_nir_scan_shader(const struct nir_shader *nir, - struct tgsi_shader_info *info) + struct si_shader_info *info) { nir_function *func; unsigned i; @@ -794,6 +787,11 @@ void si_nir_scan_shader(const struct nir_shader *nir, if (info->processor == PIPE_SHADER_FRAGMENT) info->uses_kill = nir->info.fs.uses_discard; + if (nir->info.stage == MESA_SHADER_TESS_CTRL) { + info->tessfactors_are_def_in_all_invocs = + ac_are_tessfactors_def_in_all_invocs(nir); + } + func = (struct nir_function *)exec_list_get_head_const(&nir->functions); nir_foreach_block(block, func->impl) { nir_foreach_instr(instr, block) @@ -801,14 +799,10 @@ void si_nir_scan_shader(const struct nir_shader *nir, } } -void +static void si_nir_opts(struct nir_shader *nir) { bool progress; - unsigned lower_flrp = - (nir->options->lower_flrp16 ? 16 : 0) | - (nir->options->lower_flrp32 ? 32 : 0) | - (nir->options->lower_flrp64 ? 64 : 0); do { progress = false; @@ -839,7 +833,12 @@ si_nir_opts(struct nir_shader *nir) NIR_PASS(progress, nir, nir_opt_algebraic); NIR_PASS(progress, nir, nir_opt_constant_folding); - if (lower_flrp != 0) { + if (!nir->info.flrp_lowered) { + unsigned lower_flrp = + (nir->options->lower_flrp16 ? 16 : 0) | + (nir->options->lower_flrp32 ? 32 : 0) | + (nir->options->lower_flrp64 ? 64 : 0); + assert(lower_flrp); bool lower_flrp_progress = false; NIR_PASS(lower_flrp_progress, nir, nir_lower_flrp, @@ -855,7 +854,7 @@ si_nir_opts(struct nir_shader *nir) /* Nothing should rematerialize any flrps, so we only * need to do this lowering once. */ - lower_flrp = 0; + nir->info.flrp_lowered = true; } NIR_PASS(progress, nir, nir_opt_undef); @@ -913,7 +912,7 @@ si_nir_lower_color(nir_shader *nir) } } -void si_nir_lower_ps_inputs(struct nir_shader *nir) +static void si_nir_lower_ps_inputs(struct nir_shader *nir) { if (nir->info.stage != MESA_SHADER_FRAGMENT) return; @@ -938,32 +937,27 @@ void si_nir_lower_ps_inputs(struct nir_shader *nir) nir_var_shader_in); } -/** - * Perform "lowering" operations on the NIR that are run once when the shader - * selector is created. - */ -void si_lower_nir(struct si_shader_selector *sel) +void si_nir_adjust_driver_locations(struct nir_shader *nir) { /* Adjust the driver location of inputs and outputs. The state tracker * interprets them as slots, while the ac/nir backend interprets them * as individual components. */ - if (sel->nir->info.stage != MESA_SHADER_FRAGMENT) { - nir_foreach_variable(variable, &sel->nir->inputs) + if (nir->info.stage != MESA_SHADER_FRAGMENT) { + nir_foreach_variable(variable, &nir->inputs) variable->data.driver_location *= 4; } - nir_foreach_variable(variable, &sel->nir->outputs) { + nir_foreach_variable(variable, &nir->outputs) variable->data.driver_location *= 4; +} - if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) { - if (variable->data.location == FRAG_RESULT_DEPTH) - variable->data.driver_location += 2; - else if (variable->data.location == FRAG_RESULT_STENCIL) - variable->data.driver_location += 1; - } - } - +/** + * Perform "lowering" operations on the NIR that are run once when the shader + * selector is created. + */ +static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir) +{ /* Perform lowerings (and optimizations) of code. * * Performance considerations aside, we must: @@ -975,7 +969,7 @@ void si_lower_nir(struct si_shader_selector *sel) static const struct nir_lower_tex_options lower_tex_options = { .lower_txp = ~0u, }; - NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options); + NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options); const nir_lower_subgroups_options subgroups_options = { .subgroup_size = 64, @@ -985,25 +979,43 @@ void si_lower_nir(struct si_shader_selector *sel) .lower_vote_trivial = false, .lower_vote_eq_to_ballot = true, }; - NIR_PASS_V(sel->nir, nir_lower_subgroups, &subgroups_options); + NIR_PASS_V(nir, nir_lower_subgroups, &subgroups_options); /* Lower load constants to scalar and then clean up the mess */ - NIR_PASS_V(sel->nir, nir_lower_load_const_to_scalar); - NIR_PASS_V(sel->nir, nir_lower_var_copies); - si_nir_opts(sel->nir); + NIR_PASS_V(nir, nir_lower_load_const_to_scalar); + NIR_PASS_V(nir, nir_lower_var_copies); + NIR_PASS_V(nir, nir_lower_pack); + NIR_PASS_V(nir, nir_opt_access); + si_nir_opts(nir); /* Lower large variables that are always constant with load_constant * intrinsics, which get turned into PC-relative loads from a data * section next to the shader. + * + * st/mesa calls finalize_nir twice, but we can't call this pass twice. */ - NIR_PASS_V(sel->nir, nir_opt_large_constants, - glsl_get_natural_size_align_bytes, 16); + bool changed = false; + if (!nir->constant_data) { + NIR_PASS(changed, nir, nir_opt_large_constants, + glsl_get_natural_size_align_bytes, 16); + } - ac_lower_indirect_derefs(sel->nir, sel->screen->info.chip_class); + changed |= ac_lower_indirect_derefs(nir, sscreen->info.chip_class); + if (changed) + si_nir_opts(nir); - si_nir_opts(sel->nir); + NIR_PASS_V(nir, nir_lower_bool_to_int32); + NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp); +} + +void si_finalize_nir(struct pipe_screen *screen, void *nirptr, bool optimize) +{ + struct si_screen *sscreen = (struct si_screen *)screen; + struct nir_shader *nir = (struct nir_shader *)nirptr; - NIR_PASS_V(sel->nir, nir_lower_bool_to_int32); + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + si_nir_lower_ps_inputs(nir); + si_lower_nir(sscreen, nir); } static void declare_nir_input_vs(struct si_shader_context *ctx, @@ -1026,19 +1038,19 @@ si_nir_lookup_interp_param(struct ac_shader_abi *abi, case INTERP_MODE_SMOOTH: case INTERP_MODE_NONE: if (location == INTERP_CENTER) - return ctx->abi.persp_center; + return ac_get_arg(&ctx->ac, ctx->args.persp_center); else if (location == INTERP_CENTROID) return ctx->abi.persp_centroid; else if (location == INTERP_SAMPLE) - return ctx->abi.persp_sample; + return ac_get_arg(&ctx->ac, ctx->args.persp_sample); break; case INTERP_MODE_NOPERSPECTIVE: if (location == INTERP_CENTER) - return ctx->abi.linear_center; + return ac_get_arg(&ctx->ac, ctx->args.linear_center); else if (location == INTERP_CENTROID) - return ctx->abi.linear_centroid; + return ac_get_arg(&ctx->ac, ctx->args.linear_centroid); else if (location == INTERP_SAMPLE) - return ctx->abi.linear_sample; + return ac_get_arg(&ctx->ac, ctx->args.linear_sample); break; default: assert(!"Unhandled interpolation mode."); @@ -1061,8 +1073,7 @@ si_nir_load_sampler_desc(struct ac_shader_abi *abi, assert(desc_type <= AC_DESC_BUFFER); if (bindless) { - LLVMValueRef list = - LLVMGetParam(ctx->main_fn, ctx->param_bindless_samplers_and_images); + LLVMValueRef list = ac_get_arg(&ctx->ac, ctx->bindless_samplers_and_images); /* dynamic_index is the bindless handle */ if (image) { @@ -1093,7 +1104,7 @@ si_nir_load_sampler_desc(struct ac_shader_abi *abi, unsigned num_slots = image ? ctx->num_images : ctx->num_samplers; assert(const_index < num_slots || dynamic_index); - LLVMValueRef list = LLVMGetParam(ctx->main_fn, ctx->param_samplers_and_images); + LLVMValueRef list = ac_get_arg(&ctx->ac, ctx->samplers_and_images); LLVMValueRef index = LLVMConstInt(ctx->ac.i32, const_index, false); if (dynamic_index) { @@ -1140,7 +1151,7 @@ static void bitcast_inputs(struct si_shader_context *ctx, bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir) { - struct tgsi_shader_info *info = &ctx->shader->selector->info; + struct si_shader_info *info = &ctx->shader->selector->info; if (nir->info.stage == MESA_SHADER_VERTEX) { uint64_t processed_inputs = 0; @@ -1209,7 +1220,7 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir) ctx->shader->key.mono.u.ps.interpolate_at_sample_force_center; } else if (nir->info.stage == MESA_SHADER_COMPUTE) { if (nir->info.cs.user_data_components_amd) { - ctx->abi.user_data = LLVMGetParam(ctx->main_fn, ctx->param_cs_user_data); + ctx->abi.user_data = ac_get_arg(&ctx->ac, ctx->cs_user_data); ctx->abi.user_data = ac_build_expand_to_vec4(&ctx->ac, ctx->abi.user_data, nir->info.cs.user_data_components_amd); } @@ -1227,7 +1238,7 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir) assert(gl_shader_stage_is_compute(nir->info.stage)); si_declare_compute_memory(ctx); } - ac_nir_translate(&ctx->ac, &ctx->abi, nir); + ac_nir_translate(&ctx->ac, &ctx->abi, &ctx->args, nir); return true; }