X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_nir_to_llvm.c;h=db21ad809b784e424da9b5839300c2979ce65210;hb=2182bbf84f0f19846a47f0438ec702f4d862731e;hp=d833bc2477dc5b945d5f28f8742898782cfe241b;hpb=2d295ab3f35acd796826d6f06f798d8618b1d814;p=mesa.git diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index d833bc2477d..db21ad809b7 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -589,11 +589,12 @@ store_tcs_output(struct ac_shader_abi *abi, LLVMValueRef param_index, unsigned const_index, LLVMValueRef src, - unsigned writemask) + unsigned writemask, + unsigned component, + unsigned driver_location) { struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); const unsigned location = var->data.location; - unsigned component = var->data.location_frac; const bool is_patch = var->data.patch; const bool is_compact = var->data.compact; LLVMValueRef dw_addr; @@ -876,39 +877,21 @@ static LLVMValueRef load_sample_mask_in(struct ac_shader_abi *abi) static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx, unsigned stream, + LLVMValueRef vertexidx, LLVMValueRef *addrs); static void -visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs) +visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream, + LLVMValueRef vertexidx, LLVMValueRef *addrs) { - LLVMValueRef gs_next_vertex; - LLVMValueRef can_emit; unsigned offset = 0; struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); if (ctx->args->options->key.vs_common_out.as_ngg) { - gfx10_ngg_gs_emit_vertex(ctx, stream, addrs); + gfx10_ngg_gs_emit_vertex(ctx, stream, vertexidx, addrs); return; } - /* Write vertex attribute values to GSVS ring */ - gs_next_vertex = LLVMBuildLoad(ctx->ac.builder, - ctx->gs_next_vertex[stream], - ""); - - /* If this thread has already emitted the declared maximum number of - * vertices, don't emit any more: excessive vertex emissions are not - * supposed to have any effect. - */ - can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, gs_next_vertex, - LLVMConstInt(ctx->ac.i32, ctx->shader->info.gs.vertices_out, false), ""); - - bool use_kill = !ctx->args->shader_info->gs.writes_memory; - if (use_kill) - ac_build_kill_if_false(&ctx->ac, can_emit); - else - ac_build_ifcc(&ctx->ac, can_emit, 6505); - for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { unsigned output_usage_mask = ctx->args->shader_info->gs.output_usage_mask[i]; @@ -933,7 +916,7 @@ visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addr offset++; - voffset = LLVMBuildAdd(ctx->ac.builder, voffset, gs_next_vertex, ""); + voffset = LLVMBuildAdd(ctx->ac.builder, voffset, vertexidx, ""); voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), ""); out_val = ac_to_integer(&ctx->ac, out_val); @@ -949,16 +932,9 @@ visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addr } } - gs_next_vertex = LLVMBuildAdd(ctx->ac.builder, gs_next_vertex, - ctx->ac.i32_1, ""); - LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex[stream]); - ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8), ctx->gs_wave_id); - - if (!use_kill) - ac_build_endif(&ctx->ac, 6505); } static void @@ -1296,7 +1272,7 @@ handle_vs_input_decl(struct radv_shader_context *ctx, * access are detected. Only GFX6 and GFX10 are affected. */ bool unaligned_vertex_fetches = false; - if ((ctx->ac.chip_class == GFX6 || ctx->ac.chip_class == GFX10) && + if ((ctx->ac.chip_class == GFX6 || ctx->ac.chip_class >= GFX10) && vtx_info->chan_format != data_format && ((attrib_offset % vtx_info->element_size) || (attrib_stride % vtx_info->element_size))) @@ -1307,7 +1283,7 @@ handle_vs_input_decl(struct radv_shader_context *ctx, LLVMValueRef values[4]; assert(ctx->ac.chip_class == GFX6 || - ctx->ac.chip_class == GFX10); + ctx->ac.chip_class >= GFX10); for (unsigned chan = 0; chan < num_channels; chan++) { unsigned chan_offset = attrib_offset + chan * vtx_info->chan_byte_size; @@ -1392,7 +1368,7 @@ handle_vs_input_decl(struct radv_shader_context *ctx, static void handle_vs_inputs(struct radv_shader_context *ctx, struct nir_shader *nir) { - nir_foreach_variable(variable, &nir->inputs) + nir_foreach_shader_in_variable(variable, nir) handle_vs_input_decl(ctx, variable); } @@ -1402,7 +1378,7 @@ prepare_interp_optimize(struct radv_shader_context *ctx, { bool uses_center = false; bool uses_centroid = false; - nir_foreach_variable(variable, &nir->inputs) { + nir_foreach_shader_in_variable(variable, nir) { if (glsl_get_base_type(glsl_without_array(variable->type)) != GLSL_TYPE_FLOAT || variable->data.sample) continue; @@ -1578,6 +1554,30 @@ si_llvm_init_export_args(struct radv_shader_context *ctx, break; } + /* Replace NaN by zero (only 32-bit) to fix game bugs if + * requested. + */ + if (ctx->args->options->enable_mrt_output_nan_fixup && + !is_16bit && + (col_format == V_028714_SPI_SHADER_32_R || + col_format == V_028714_SPI_SHADER_32_GR || + col_format == V_028714_SPI_SHADER_32_AR || + col_format == V_028714_SPI_SHADER_32_ABGR || + col_format == V_028714_SPI_SHADER_FP16_ABGR)) { + for (unsigned i = 0; i < 4; i++) { + LLVMValueRef args[2] = { + values[i], + LLVMConstInt(ctx->ac.i32, S_NAN | Q_NAN, false) + }; + LLVMValueRef isnan = + ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f32", ctx->ac.i1, + args, 2, AC_FUNC_ATTR_READNONE); + values[i] = LLVMBuildSelect(ctx->ac.builder, isnan, + ctx->ac.f32_0, + values[i], ""); + } + } + /* Pack f16 or norm_i16/u16. */ if (packf) { for (chan = 0; chan < 2; chan++) { @@ -1797,6 +1797,7 @@ radv_build_param_exports(struct radv_shader_context *ctx, if (slot_name != VARYING_SLOT_LAYER && slot_name != VARYING_SLOT_PRIMITIVE_ID && + slot_name != VARYING_SLOT_VIEWPORT && slot_name != VARYING_SLOT_CLIP_DIST0 && slot_name != VARYING_SLOT_CLIP_DIST1 && slot_name < VARYING_SLOT_VAR0) @@ -1916,12 +1917,10 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, outinfo->pos_exports++; } - /* Navi10-14 skip POS0 exports if EXEC=0 and DONE=0, causing a hang. + /* GFX10 skip POS0 exports if EXEC=0 and DONE=0, causing a hang. * Setting valid_mask=1 prevents it and has no other effect. */ - if (ctx->ac.family == CHIP_NAVI10 || - ctx->ac.family == CHIP_NAVI12 || - ctx->ac.family == CHIP_NAVI14) + if (ctx->ac.chip_class == GFX10) pos_args[0].valid_mask = 1; pos_idx = 0; @@ -2318,8 +2317,7 @@ static void build_streamout_vertex(struct radv_shader_context *ctx, for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { unsigned output_usage_mask = ctx->args->shader_info->gs.output_usage_mask[i]; - uint8_t output_stream = - output_stream = ctx->args->shader_info->gs.output_streams[i]; + uint8_t output_stream = ctx->args->shader_info->gs.output_streams[i]; if (!(ctx->output_mask & (1ull << i)) || output_stream != stream) @@ -3309,25 +3307,11 @@ static void gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx) static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx, unsigned stream, + LLVMValueRef vertexidx, LLVMValueRef *addrs) { LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef tmp; - const LLVMValueRef vertexidx = - LLVMBuildLoad(builder, ctx->gs_next_vertex[stream], ""); - - /* If this thread has already emitted the declared maximum number of - * vertices, skip the write: excessive vertex emissions are not - * supposed to have any effect. - */ - const LLVMValueRef can_emit = - LLVMBuildICmp(builder, LLVMIntULT, vertexidx, - LLVMConstInt(ctx->ac.i32, ctx->shader->info.gs.vertices_out, false), ""); - ac_build_ifcc(&ctx->ac, can_emit, 9001); - - tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, ""); - tmp = LLVMBuildSelect(builder, can_emit, tmp, vertexidx, ""); - LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]); const LLVMValueRef vertexptr = ngg_gs_emit_vertex_ptr(ctx, get_thread_id_in_tg(ctx), vertexidx); @@ -3359,6 +3343,13 @@ static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx, } assert(out_idx * 4 <= ctx->args->shader_info->gs.gsvs_vertex_size); + /* Store the current number of emitted vertices to zero out remaining + * primitive flags in case the geometry shader doesn't emit the maximum + * number of vertices. + */ + tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, ""); + LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]); + /* Determine and store whether this vertex completed a primitive. */ const LLVMValueRef curverts = LLVMBuildLoad(builder, ctx->gs_curprim_verts[stream], ""); @@ -3395,8 +3386,6 @@ static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx, tmp = LLVMBuildLoad(builder, ctx->gs_generated_prims[stream], ""); tmp = LLVMBuildAdd(builder, tmp, LLVMBuildZExt(builder, iscompleteprim, ctx->ac.i32, ""), ""); LLVMBuildStore(builder, tmp, ctx->gs_generated_prims[stream]); - - ac_build_endif(&ctx->ac, 9001); } static void @@ -3738,7 +3727,7 @@ ac_nir_eliminate_const_vs_outputs(struct radv_shader_context *ctx) ac_optimize_vs_outputs(&ctx->ac, ctx->main_function, outinfo->vs_output_param_offset, - VARYING_SLOT_MAX, + VARYING_SLOT_MAX, 0, &outinfo->param_exports); } @@ -3747,7 +3736,7 @@ ac_setup_rings(struct radv_shader_context *ctx) { if (ctx->args->options->chip_class <= GFX8 && (ctx->stage == MESA_SHADER_GEOMETRY || - ctx->args->options->key.vs_common_out.as_es || ctx->args->options->key.vs_common_out.as_es)) { + ctx->args->options->key.vs_common_out.as_es)) { unsigned ring = ctx->stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS : RING_ESGS_VS; LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, ring, false); @@ -3925,7 +3914,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class, args->options->family, float_mode, - args->shader_info->wave_size, 64); + args->shader_info->wave_size, + args->shader_info->ballot_bit_size); ctx.context = ctx.ac.context; ctx.max_workgroup_size = 0; @@ -3947,7 +3937,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ctx.abi.inputs = &ctx.inputs[0]; ctx.abi.emit_outputs = handle_shader_outputs_post; - ctx.abi.emit_vertex = visit_emit_vertex; + ctx.abi.emit_vertex_with_counter = visit_emit_vertex_with_counter; ctx.abi.load_ubo = radv_load_ubo; ctx.abi.load_ssbo = radv_load_ssbo; ctx.abi.load_sampler_desc = radv_get_sampler_desc; @@ -4038,13 +4028,15 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ctx.tcs_num_inputs = args->options->key.tcs.num_inputs; else ctx.tcs_num_inputs = util_last_bit64(args->shader_info->vs.ls_outputs_written); + unsigned tcs_num_outputs = util_last_bit64(ctx.args->shader_info->tcs.outputs_written); + unsigned tcs_num_patch_outputs = util_last_bit64(ctx.args->shader_info->tcs.patch_outputs_written); ctx.tcs_num_patches = get_tcs_num_patches( ctx.args->options->key.tcs.input_vertices, ctx.shader->info.tess.tcs_vertices_out, ctx.tcs_num_inputs, - ctx.args->shader_info->tcs.outputs_written, - ctx.args->shader_info->tcs.patch_outputs_written, + tcs_num_outputs, + tcs_num_patch_outputs, ctx.args->options->tess_offchip_block_dw_size, ctx.args->options->chip_class, ctx.args->options->family); @@ -4099,7 +4091,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ac_emit_barrier(&ctx.ac, ctx.stage); } - nir_foreach_variable(variable, &shaders[i]->outputs) + nir_foreach_shader_out_variable(variable, shaders[i]) scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage); ac_setup_rings(&ctx); @@ -4148,15 +4140,18 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, } if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) { + unsigned tcs_num_outputs = util_last_bit64(ctx.args->shader_info->tcs.outputs_written); + unsigned tcs_num_patch_outputs = util_last_bit64(ctx.args->shader_info->tcs.patch_outputs_written); args->shader_info->tcs.num_patches = ctx.tcs_num_patches; - args->shader_info->tcs.lds_size = + args->shader_info->tcs.num_lds_blocks = calculate_tess_lds_size( + ctx.args->options->chip_class, ctx.args->options->key.tcs.input_vertices, ctx.shader->info.tess.tcs_vertices_out, ctx.tcs_num_inputs, ctx.tcs_num_patches, - ctx.args->shader_info->tcs.outputs_written, - ctx.args->shader_info->tcs.patch_outputs_written); + tcs_num_outputs, + tcs_num_patch_outputs); } } @@ -4415,7 +4410,7 @@ radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm, ac_setup_rings(&ctx); - nir_foreach_variable(variable, &geom_shader->outputs) { + nir_foreach_shader_out_variable(variable, geom_shader) { scan_shader_output_decl(&ctx, variable, geom_shader, MESA_SHADER_VERTEX); ac_handle_shader_output_decl(&ctx.ac, &ctx.abi, geom_shader, variable, MESA_SHADER_VERTEX); @@ -4447,8 +4442,6 @@ llvm_compile_shader(struct radv_device *device, tm_options |= AC_TM_SUPPORTS_SPILL; if (args->options->check_ir) tm_options |= AC_TM_CHECK_IR; - if (device->instance->debug_flags & RADV_DEBUG_NO_LOAD_STORE_OPT) - tm_options |= AC_TM_NO_LOAD_STORE_OPT; thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);