From: Connor Abbott Date: Fri, 10 May 2019 08:44:20 +0000 (+0200) Subject: radv: Use NIR barycentric intrinsics X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=118a66df9907772bb9e5503b736c95d7bb62d52c;p=mesa.git radv: Use NIR barycentric intrinsics We have to add a few lowering to deal with things that used to be dealt with inline when creating inputs. We also move the code that fills out the radv_shader_variant_info struct for linking purposes to radv_shader.c, as it's no longer tied to the NIR->LLVM lowering. Reviewed-by: Bas Nieuwenhuizen --- diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index aabb297ca21..5f41a8f5843 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -2311,131 +2311,6 @@ handle_vs_input_decl(struct radv_shader_context *ctx, } } -static void interp_fs_input(struct radv_shader_context *ctx, - unsigned attr, - LLVMValueRef interp_param, - LLVMValueRef prim_mask, - bool float16, - LLVMValueRef result[4]) -{ - LLVMValueRef attr_number; - unsigned chan; - LLVMValueRef i, j; - bool interp = !LLVMIsUndef(interp_param); - - attr_number = LLVMConstInt(ctx->ac.i32, attr, false); - - /* fs.constant returns the param from the middle vertex, so it's not - * really useful for flat shading. It's meant to be used for custom - * interpolation (but the intrinsic can't fetch from the other two - * vertices). - * - * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state - * to do the right thing. The only reason we use fs.constant is that - * fs.interp cannot be used on integers, because they can be equal - * to NaN. - */ - if (interp) { - interp_param = LLVMBuildBitCast(ctx->ac.builder, interp_param, - ctx->ac.v2f32, ""); - - i = LLVMBuildExtractElement(ctx->ac.builder, interp_param, - ctx->ac.i32_0, ""); - j = LLVMBuildExtractElement(ctx->ac.builder, interp_param, - ctx->ac.i32_1, ""); - } - - for (chan = 0; chan < 4; chan++) { - LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false); - - if (interp && float16) { - result[chan] = ac_build_fs_interp_f16(&ctx->ac, - llvm_chan, - attr_number, - prim_mask, i, j); - } else if (interp) { - result[chan] = ac_build_fs_interp(&ctx->ac, - llvm_chan, - attr_number, - prim_mask, i, j); - } else { - result[chan] = ac_build_fs_interp_mov(&ctx->ac, - LLVMConstInt(ctx->ac.i32, 2, false), - llvm_chan, - attr_number, - prim_mask); - result[chan] = LLVMBuildBitCast(ctx->ac.builder, result[chan], ctx->ac.i32, ""); - result[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, result[chan], float16 ? ctx->ac.i16 : ctx->ac.i32, ""); - } - } -} - -static void mark_16bit_fs_input(struct radv_shader_context *ctx, - const struct glsl_type *type, - int location) -{ - if (glsl_type_is_scalar(type) || glsl_type_is_vector(type) || glsl_type_is_matrix(type)) { - unsigned attrib_count = glsl_count_attribute_slots(type, false); - if (glsl_type_is_16bit(type)) { - ctx->float16_shaded_mask |= ((1ull << attrib_count) - 1) << location; - } - } else if (glsl_type_is_array(type)) { - unsigned stride = glsl_count_attribute_slots(glsl_get_array_element(type), false); - for (unsigned i = 0; i < glsl_get_length(type); ++i) { - mark_16bit_fs_input(ctx, glsl_get_array_element(type), location + i * stride); - } - } else { - assert(glsl_type_is_struct_or_ifc(type)); - for (unsigned i = 0; i < glsl_get_length(type); i++) { - mark_16bit_fs_input(ctx, glsl_get_struct_field(type, i), location); - location += glsl_count_attribute_slots(glsl_get_struct_field(type, i), false); - } - } -} - -static void -handle_fs_input_decl(struct radv_shader_context *ctx, - struct nir_variable *variable) -{ - int idx = variable->data.location; - unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); - LLVMValueRef interp = NULL; - uint64_t mask; - - variable->data.driver_location = idx * 4; - - - if (variable->data.compact) { - unsigned component_count = variable->data.location_frac + - glsl_get_length(variable->type); - attrib_count = (component_count + 3) / 4; - } else - mark_16bit_fs_input(ctx, variable->type, idx); - - mask = ((1ull << attrib_count) - 1) << variable->data.location; - - if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT || - glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT16 || - glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_STRUCT) { - unsigned interp_type; - if (variable->data.sample) - interp_type = INTERP_SAMPLE; - else if (variable->data.centroid) - interp_type = INTERP_CENTROID; - else - interp_type = INTERP_CENTER; - - interp = lookup_interp_param(&ctx->abi, variable->data.interpolation, interp_type); - } - if (interp == NULL) - interp = LLVMGetUndef(ctx->ac.i32); - - for (unsigned i = 0; i < attrib_count; ++i) - ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp; - - ctx->input_mask |= mask; -} - static void handle_vs_inputs(struct radv_shader_context *ctx, struct nir_shader *nir) { @@ -2467,64 +2342,6 @@ prepare_interp_optimize(struct radv_shader_context *ctx, } } -static void -handle_fs_inputs(struct radv_shader_context *ctx, - struct nir_shader *nir) -{ - prepare_interp_optimize(ctx, nir); - - nir_foreach_variable(variable, &nir->inputs) - handle_fs_input_decl(ctx, variable); - - unsigned index = 0; - - if (ctx->shader_info->info.needs_multiview_view_index || - ctx->shader_info->info.ps.layer_input) { - ctx->input_mask |= 1ull << VARYING_SLOT_LAYER; - ctx->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)] = LLVMGetUndef(ctx->ac.i32); - } - - for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) { - LLVMValueRef interp_param; - LLVMValueRef *inputs = ctx->inputs +ac_llvm_reg_index_soa(i, 0); - - if (!(ctx->input_mask & (1ull << i))) - continue; - - if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC || - i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) { - interp_param = *inputs; - bool float16 = (ctx->float16_shaded_mask >> i) & 1; - interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask, float16, - inputs); - - if (LLVMIsUndef(interp_param)) - ctx->shader_info->fs.flat_shaded_mask |= 1u << index; - if (float16) - ctx->shader_info->fs.float16_shaded_mask |= 1u << index; - if (i >= VARYING_SLOT_VAR0) - ctx->abi.fs_input_attr_indices[i - VARYING_SLOT_VAR0] = index; - ++index; - } else if (i == VARYING_SLOT_CLIP_DIST0) { - int length = ctx->shader_info->info.ps.num_input_clips_culls; - - for (unsigned j = 0; j < length; j += 4) { - inputs = ctx->inputs + ac_llvm_reg_index_soa(i, j); - - interp_param = *inputs; - interp_fs_input(ctx, index, interp_param, - ctx->abi.prim_mask, false, inputs); - ++index; - } - } - } - ctx->shader_info->fs.num_interp = index; - ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0; - - if (ctx->shader_info->info.needs_multiview_view_index) - ctx->abi.view_index = ctx->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]; -} - static void scan_shader_output_decl(struct radv_shader_context *ctx, struct nir_variable *variable, @@ -3877,8 +3694,6 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ctx.ac.builder = ac_create_builder(ctx.context, float_mode); - memset(shader_info, 0, sizeof(*shader_info)); - radv_nir_shader_info_init(&shader_info->info); for(int i = 0; i < shader_count; ++i) @@ -4004,7 +3819,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, } if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) - handle_fs_inputs(&ctx, shaders[i]); + prepare_interp_optimize(&ctx, shaders[i]); else if(shaders[i]->info.stage == MESA_SHADER_VERTEX) handle_vs_inputs(&ctx, shaders[i]); else if(shader_count >= 2 && shaders[i]->info.stage == MESA_SHADER_GEOMETRY) diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 839166992d1..8f42514feb9 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -76,7 +76,8 @@ static const struct nir_shader_compiler_options nir_options = { .lower_fpow = true, .lower_mul_2x32_64 = true, .lower_rotate = true, - .max_unroll_iterations = 32 + .max_unroll_iterations = 32, + .use_interpolated_input_intrinsics = true, }; VkResult radv_CreateShaderModule( @@ -361,11 +362,11 @@ radv_shader_compile_to_nir(struct radv_device *device, nir_lower_vars_to_ssa(nir); if (nir->info.stage == MESA_SHADER_VERTEX || - nir->info.stage == MESA_SHADER_GEOMETRY) { + nir->info.stage == MESA_SHADER_GEOMETRY || + nir->info.stage == MESA_SHADER_FRAGMENT) { NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true); - } else if (nir->info.stage == MESA_SHADER_TESS_EVAL|| - nir->info.stage == MESA_SHADER_FRAGMENT) { + } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) { NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, false); } @@ -405,6 +406,152 @@ radv_shader_compile_to_nir(struct radv_device *device, return nir; } +static void mark_16bit_fs_input(struct radv_shader_variant_info *shader_info, + const struct glsl_type *type, + int location) +{ + if (glsl_type_is_scalar(type) || glsl_type_is_vector(type) || glsl_type_is_matrix(type)) { + unsigned attrib_count = glsl_count_attribute_slots(type, false); + if (glsl_type_is_16bit(type)) { + shader_info->fs.float16_shaded_mask |= ((1ull << attrib_count) - 1) << location; + } + } else if (glsl_type_is_array(type)) { + unsigned stride = glsl_count_attribute_slots(glsl_get_array_element(type), false); + for (unsigned i = 0; i < glsl_get_length(type); ++i) { + mark_16bit_fs_input(shader_info, glsl_get_array_element(type), location + i * stride); + } + } else { + assert(glsl_type_is_struct_or_ifc(type)); + for (unsigned i = 0; i < glsl_get_length(type); i++) { + mark_16bit_fs_input(shader_info, glsl_get_struct_field(type, i), location); + location += glsl_count_attribute_slots(glsl_get_struct_field(type, i), false); + } + } +} + +static void +handle_fs_input_decl(struct radv_shader_variant_info *shader_info, + struct nir_variable *variable) +{ + unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); + + if (variable->data.compact) { + unsigned component_count = variable->data.location_frac + + glsl_get_length(variable->type); + attrib_count = (component_count + 3) / 4; + } else { + mark_16bit_fs_input(shader_info, variable->type, + variable->data.driver_location); + } + + uint64_t mask = ((1ull << attrib_count) - 1); + + if (variable->data.interpolation == INTERP_MODE_FLAT) + shader_info->fs.flat_shaded_mask |= mask << variable->data.driver_location; + + if (variable->data.location >= VARYING_SLOT_VAR0) + shader_info->fs.input_mask |= mask << (variable->data.location - VARYING_SLOT_VAR0); +} + +static int +type_size_vec4(const struct glsl_type *type, bool bindless) +{ + return glsl_count_attribute_slots(type, false); +} + +static nir_variable * +find_layer_in_var(nir_shader *nir) +{ + nir_foreach_variable(var, &nir->inputs) { + if (var->data.location == VARYING_SLOT_LAYER) { + return var; + } + } + + nir_variable *var = + nir_variable_create(nir, nir_var_shader_in, glsl_int_type(), "layer id"); + var->data.location = VARYING_SLOT_LAYER; + var->data.interpolation = INTERP_MODE_FLAT; + return var; +} + +/* We use layered rendering to implement multiview, which means we need to map + * view_index to gl_Layer. The attachment lowering also uses needs to know the + * layer so that it can sample from the correct layer. The code generates a + * load from the layer_id sysval, but since we don't have a way to get at this + * information from the fragment shader, we also need to lower this to the + * gl_Layer varying. This pass lowers both to a varying load from the LAYER + * slot, before lowering io, so that nir_assign_var_locations() will give the + * LAYER varying the correct driver_location. + */ + +static bool +lower_view_index(nir_shader *nir) +{ + bool progress = false; + nir_function_impl *entry = nir_shader_get_entrypoint(nir); + nir_builder b; + nir_builder_init(&b, entry); + + nir_variable *layer = NULL; + nir_foreach_block(block, entry) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr); + if (load->intrinsic != nir_intrinsic_load_view_index && + load->intrinsic != nir_intrinsic_load_layer_id) + continue; + + if (!layer) + layer = find_layer_in_var(nir); + + b.cursor = nir_before_instr(instr); + nir_ssa_def *def = nir_load_var(&b, layer); + nir_ssa_def_rewrite_uses(&load->dest.ssa, + nir_src_for_ssa(def)); + + nir_instr_remove(instr); + progress = true; + } + } + + return progress; +} + +/* Gather information needed to setup the vs<->ps linking registers in + * radv_pipeline_generate_ps_inputs(). + */ + +static void +handle_fs_inputs(nir_shader *nir, struct radv_shader_variant_info *shader_info) +{ + shader_info->fs.num_interp = nir->num_inputs; + + nir_foreach_variable(variable, &nir->inputs) + handle_fs_input_decl(shader_info, variable); +} + +static void +lower_fs_io(nir_shader *nir, struct radv_shader_variant_info *shader_info) +{ + NIR_PASS_V(nir, lower_view_index); + nir_assign_io_var_locations(&nir->inputs, &nir->num_inputs, + MESA_SHADER_FRAGMENT); + + handle_fs_inputs(nir, shader_info); + + NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in, type_size_vec4, 0); + + /* This pass needs actual constants */ + nir_opt_constant_folding(nir); + + NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in); + radv_optimize_nir(nir, false, false); +} + + void * radv_alloc_shader_memory(struct radv_device *device, struct radv_shader_variant *shader) @@ -853,6 +1000,9 @@ shader_variant_compile(struct radv_device *device, struct radv_shader_variant_info variant_info = {0}; bool thread_compiler; + if (shaders[0]->info.stage == MESA_SHADER_FRAGMENT) + lower_fs_io(shaders[0], &variant_info); + options->family = chip_family; options->chip_class = device->physical_device->rad_info.chip_class; options->dump_shader = radv_can_dump_shader(device, module, gs_copy_shader); diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index 8653ffb18a1..d813c33e1c8 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -224,7 +224,7 @@ gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, struct radv_shader_info *info) { switch (instr->intrinsic) { - case nir_intrinsic_interp_deref_at_sample: + case nir_intrinsic_load_barycentric_at_sample: info->ps.needs_sample_positions = true; break; case nir_intrinsic_load_draw_id: