From b3a226691d91c4b373ff1b891be7e2c1409dfd7f Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 14 May 2019 13:29:52 +0200 Subject: [PATCH] radeonsi/nir: Use NIR barycentric intrinsics MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This is simpler than radv, since the driver_location is already assigned for us. Reviewed-by: Bas Nieuwenhuizen Reviewed-by: Marek Olšák --- src/amd/common/ac_nir_to_llvm.c | 6 + src/amd/common/ac_shader_abi.h | 2 + src/gallium/drivers/radeonsi/si_get.c | 1 + src/gallium/drivers/radeonsi/si_shader_nir.c | 203 ++++++++++++------- 4 files changed, 143 insertions(+), 69 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 51d26e00c97..d9332700105 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -3341,6 +3341,12 @@ static void visit_intrinsic(struct ac_nir_context *ctx, case nir_intrinsic_load_helper_invocation: result = ac_build_load_helper_invocation(&ctx->ac); break; + case nir_intrinsic_load_color0: + result = ctx->abi->color0; + break; + case nir_intrinsic_load_color1: + result = ctx->abi->color1; + break; case nir_intrinsic_load_instance_id: result = ctx->abi->instance_id; break; diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h index 2051f22d29b..083ab6c2298 100644 --- a/src/amd/common/ac_shader_abi.h +++ b/src/amd/common/ac_shader_abi.h @@ -63,6 +63,8 @@ struct ac_shader_abi { LLVMValueRef ancillary; LLVMValueRef sample_coverage; LLVMValueRef prim_mask; + LLVMValueRef color0; + LLVMValueRef color1; /* CS */ LLVMValueRef local_invocation_ids; LLVMValueRef num_work_groups; diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 59d07523fbe..8e07cbe989f 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -510,6 +510,7 @@ static const struct nir_shader_compiler_options nir_options = { .lower_rotate = true, .optimize_sample_mask_in = true, .max_unroll_iterations = 32, + .use_interpolated_input_intrinsics = true, }; static const void * diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 365c137f8fc..9ce88df8056 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -31,6 +31,7 @@ #include "compiler/nir/nir.h" #include "compiler/nir_types.h" +#include "compiler/nir/nir_builder.h" static nir_variable* tex_get_texture_var(nir_tex_instr *instr) { @@ -461,51 +462,43 @@ void si_nir_scan_shader(const struct nir_shader *nir, if (semantic_name == TGSI_SEMANTIC_PRIMID) info->uses_primid = true; - if (variable->data.sample) - info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_SAMPLE; - else if (variable->data.centroid) - info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTROID; - else - info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTER; + enum glsl_base_type base_type = + glsl_get_base_type(glsl_without_array(variable->type)); - enum glsl_base_type base_type = - glsl_get_base_type(glsl_without_array(variable->type)); + switch (variable->data.interpolation) { + case INTERP_MODE_NONE: + if (glsl_base_type_is_integer(base_type)) { + info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT; + break; + } - switch (variable->data.interpolation) { - case INTERP_MODE_NONE: - if (glsl_base_type_is_integer(base_type)) { - info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT; - break; - } + if (semantic_name == TGSI_SEMANTIC_COLOR) { + info->input_interpolate[i] = TGSI_INTERPOLATE_COLOR; + break; + } + /* fall-through */ - if (semantic_name == TGSI_SEMANTIC_COLOR) { - info->input_interpolate[i] = TGSI_INTERPOLATE_COLOR; - break; - } - /* fall-through */ + case INTERP_MODE_SMOOTH: + assert(!glsl_base_type_is_integer(base_type)); - case INTERP_MODE_SMOOTH: - assert(!glsl_base_type_is_integer(base_type)); + info->input_interpolate[i] = TGSI_INTERPOLATE_PERSPECTIVE; + break; - info->input_interpolate[i] = TGSI_INTERPOLATE_PERSPECTIVE; - break; + case INTERP_MODE_NOPERSPECTIVE: + assert(!glsl_base_type_is_integer(base_type)); - case INTERP_MODE_NOPERSPECTIVE: - assert(!glsl_base_type_is_integer(base_type)); + info->input_interpolate[i] = TGSI_INTERPOLATE_LINEAR; + break; - info->input_interpolate[i] = TGSI_INTERPOLATE_LINEAR; - break; - - case INTERP_MODE_FLAT: - info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT; - break; - } + case INTERP_MODE_FLAT: + info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT; + break; + } } } info->num_inputs = num_inputs; - i = 0; uint64_t processed_outputs = 0; unsigned num_outputs = 0; @@ -856,6 +849,53 @@ si_nir_opts(struct nir_shader *nir) } while (progress); } +static int +type_size_vec4(const struct glsl_type *type, bool bindless) +{ + return glsl_count_attribute_slots(type, false); +} + +static void +si_nir_lower_color(nir_shader *nir) +{ + nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir); + + nir_builder b; + nir_builder_init(&b, entrypoint); + + nir_foreach_block(block, entrypoint) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = + nir_instr_as_intrinsic(instr); + + if (intrin->intrinsic != nir_intrinsic_load_deref) + continue; + + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + if (deref->mode != nir_var_shader_in) + continue; + + b.cursor = nir_before_instr(instr); + nir_variable *var = nir_deref_instr_get_variable(deref); + nir_ssa_def *def; + + if (var->data.location == VARYING_SLOT_COL0) { + def = nir_load_color0(&b); + } else if (var->data.location == VARYING_SLOT_COL1) { + def = nir_load_color1(&b); + } else { + continue; + } + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(def)); + nir_instr_remove(instr); + } + } +} + /** * Perform "lowering" operations on the NIR that are run once when the shader * selector is created. @@ -867,8 +907,29 @@ si_lower_nir(struct si_shader_selector* sel) * interprets them as slots, while the ac/nir backend interprets them * as individual components. */ - nir_foreach_variable(variable, &sel->nir->inputs) - variable->data.driver_location *= 4; + if (sel->nir->info.stage != MESA_SHADER_FRAGMENT) { + nir_foreach_variable(variable, &sel->nir->inputs) + variable->data.driver_location *= 4; + } else { + NIR_PASS_V(sel->nir, nir_lower_io_to_temporaries, + nir_shader_get_entrypoint(sel->nir), false, true); + + /* Since we're doing nir_lower_io_to_temporaries late, we need + * to lower all the copy_deref's introduced by + * lower_io_to_temporaries before calling nir_lower_io. + */ + NIR_PASS_V(sel->nir, nir_split_var_copies); + NIR_PASS_V(sel->nir, nir_lower_var_copies); + NIR_PASS_V(sel->nir, nir_lower_global_vars_to_local); + + si_nir_lower_color(sel->nir); + NIR_PASS_V(sel->nir, nir_lower_io, nir_var_shader_in, type_size_vec4, 0); + + /* This pass needs actual constants */ + NIR_PASS_V(sel->nir, nir_opt_constant_folding); + NIR_PASS_V(sel->nir, nir_io_add_const_offset_to_base, + nir_var_shader_in); + } nir_foreach_variable(variable, &sel->nir->outputs) { variable->data.driver_location *= 4; @@ -924,24 +985,6 @@ static void declare_nir_input_vs(struct si_shader_context *ctx, si_llvm_load_input_vs(ctx, input_index, out); } -static void declare_nir_input_fs(struct si_shader_context *ctx, - struct nir_variable *variable, - unsigned input_index, - LLVMValueRef out[4]) -{ - unsigned slot = variable->data.location; - if (slot == VARYING_SLOT_POS) { - out[0] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT); - out[1] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT); - out[2] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT); - out[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, - LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT)); - return; - } - - si_llvm_load_input_fs(ctx, input_index, out); -} - LLVMValueRef si_nir_lookup_interp_param(struct ac_shader_abi *abi, enum glsl_interp_mode interp, unsigned location) @@ -1067,20 +1110,16 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir) { struct tgsi_shader_info *info = &ctx->shader->selector->info; - if (nir->info.stage == MESA_SHADER_VERTEX || - nir->info.stage == MESA_SHADER_FRAGMENT) { + if (nir->info.stage == MESA_SHADER_VERTEX) { uint64_t processed_inputs = 0; nir_foreach_variable(variable, &nir->inputs) { unsigned attrib_count = glsl_count_attribute_slots(variable->type, - nir->info.stage == MESA_SHADER_VERTEX); + true); unsigned input_idx = variable->data.driver_location; LLVMValueRef data[4]; unsigned loc = variable->data.location; - if (loc >= VARYING_SLOT_VAR0 && nir->info.stage == MESA_SHADER_FRAGMENT) - ctx->abi.fs_input_attr_indices[loc - VARYING_SLOT_VAR0] = input_idx / 4; - for (unsigned i = 0; i < attrib_count; i++) { /* Packed components share the same location so skip * them if we have already processed the location. @@ -1090,24 +1129,50 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir) continue; } - if (nir->info.stage == MESA_SHADER_VERTEX) { + declare_nir_input_vs(ctx, variable, input_idx / 4, data); + bitcast_inputs(ctx, data, input_idx); + if (glsl_type_is_dual_slot(variable->type)) { + input_idx += 4; declare_nir_input_vs(ctx, variable, input_idx / 4, data); bitcast_inputs(ctx, data, input_idx); - if (glsl_type_is_dual_slot(variable->type)) { - input_idx += 4; - declare_nir_input_vs(ctx, variable, input_idx / 4, data); - bitcast_inputs(ctx, data, input_idx); - } - } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { - declare_nir_input_fs(ctx, variable, input_idx / 4, data); - bitcast_inputs(ctx, data, input_idx); } processed_inputs |= ((uint64_t)1 << (loc + i)); input_idx += 4; } } - } + } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { + unsigned colors_read = + ctx->shader->selector->info.colors_read; + LLVMValueRef main_fn = ctx->main_fn; + + LLVMValueRef undef = LLVMGetUndef(ctx->f32); + + unsigned offset = SI_PARAM_POS_FIXED_PT + 1; + + if (colors_read & 0x0f) { + unsigned mask = colors_read & 0x0f; + LLVMValueRef values[4]; + values[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef; + values[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef; + values[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef; + values[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef; + ctx->abi.color0 = + ac_to_integer(&ctx->ac, + ac_build_gather_values(&ctx->ac, values, 4)); + } + if (colors_read & 0xf0) { + unsigned mask = (colors_read & 0xf0) >> 4; + LLVMValueRef values[4]; + values[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef; + values[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef; + values[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef; + values[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef; + ctx->abi.color1 = + ac_to_integer(&ctx->ac, + ac_build_gather_values(&ctx->ac, values, 4)); + } + } ctx->abi.inputs = &ctx->inputs[0]; ctx->abi.load_sampler_desc = si_nir_load_sampler_desc; -- 2.30.2