From: Timur Kristóf Date: Thu, 26 Sep 2019 15:53:17 +0000 (+0200) Subject: aco: Set GFX10 DLC bit properly. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1de9ef9c96c1933b20ba1877cad799794e10359d;p=mesa.git aco: Set GFX10 DLC bit properly. The DLC bit is now set to 1 for all loads when GLC is also set, but cleared to 0 for all stores (otherwise it causes issues), and also cleared to 0 for atomics. Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann --- diff --git a/src/amd/compiler/README b/src/amd/compiler/README index 01b8339c547..990eb62baec 100644 --- a/src/amd/compiler/README +++ b/src/amd/compiler/README @@ -105,6 +105,10 @@ The recommendation from AMD devs is to always set these two bits at the same tim as it doesn't make too much sense to set them independently, aside from some circumstances (eg. we needn't set DLC when only one shader array is used). +Stores and atomics always bypass the L1 cache, so they don't support the DLC bit, +and it shouldn't be set in these cases. Setting the DLC for these cases can result +in graphical glitches. + # Hardware Bugs ## SMEM corrupts VCCZ on SI/CI diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index d1849d7b92b..6b5111c6f47 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -2983,6 +2983,7 @@ void load_buffer(isel_context *ctx, unsigned num_components, Temp dst, Temp rsrc Builder bld(ctx->program, ctx->block); unsigned num_bytes = dst.size() * 4; + bool dlc = glc && ctx->options->chip_class >= GFX10; aco_opcode op; if (dst.type() == RegType::vgpr || (glc && ctx->options->chip_class < GFX8)) { @@ -3005,6 +3006,7 @@ void load_buffer(isel_context *ctx, unsigned num_components, Temp dst, Temp rsrc mubuf->operands[2] = soffset; mubuf->offen = (offset.type() == RegType::vgpr); mubuf->glc = glc; + mubuf->dlc = dlc; mubuf->barrier = barrier_buffer; bld.insert(std::move(mubuf)); emit_split_vector(ctx, lower, 2); @@ -3034,6 +3036,7 @@ void load_buffer(isel_context *ctx, unsigned num_components, Temp dst, Temp rsrc mubuf->operands[2] = soffset; mubuf->offen = (offset.type() == RegType::vgpr); mubuf->glc = glc; + mubuf->dlc = dlc; mubuf->barrier = barrier_buffer; mubuf->offset = const_offset; aco_ptr instr = std::move(mubuf); @@ -3087,6 +3090,7 @@ void load_buffer(isel_context *ctx, unsigned num_components, Temp dst, Temp rsrc assert(load->operands[1].getTemp().type() == RegType::sgpr); load->definitions[0] = Definition(dst); load->glc = glc; + load->dlc = dlc; load->barrier = barrier_buffer; assert(ctx->options->chip_class >= GFX8 || !glc); @@ -3623,6 +3627,7 @@ static Temp adjust_sample_index_using_fmask(isel_context *ctx, bool da, Temp coo load->operands[1] = Operand(fmask_desc_ptr); load->definitions[0] = Definition(fmask); load->glc = false; + load->dlc = false; load->dmask = 0x1; load->unrm = true; load->da = da; @@ -3832,6 +3837,7 @@ void visit_image_store(isel_context *ctx, nir_intrinsic_instr *instr) store->operands[3] = Operand(data); store->idxen = true; store->glc = glc; + store->dlc = false; store->disable_wqm = true; store->barrier = barrier_image; ctx->program->needs_exact = true; @@ -3849,6 +3855,7 @@ void visit_image_store(isel_context *ctx, nir_intrinsic_instr *instr) store->operands[2] = Operand(s4); store->operands[3] = Operand(data); store->glc = glc; + store->dlc = false; store->dmask = (1 << data.size()) - 1; store->unrm = true; store->da = should_declare_array(ctx, dim, glsl_sampler_type_is_array(type)); @@ -3945,6 +3952,7 @@ void visit_image_atomic(isel_context *ctx, nir_intrinsic_instr *instr) mubuf->offset = 0; mubuf->idxen = true; mubuf->glc = return_previous; + mubuf->dlc = false; /* Not needed for atomics */ mubuf->disable_wqm = true; mubuf->barrier = barrier_image; ctx->program->needs_exact = true; @@ -3962,6 +3970,7 @@ void visit_image_atomic(isel_context *ctx, nir_intrinsic_instr *instr) if (return_previous) mimg->definitions[0] = Definition(dst); mimg->glc = return_previous; + mimg->dlc = false; /* Not needed for atomics */ mimg->dmask = (1 << data.size()) - 1; mimg->unrm = true; mimg->da = should_declare_array(ctx, dim, glsl_sampler_type_is_array(type)); @@ -4178,6 +4187,7 @@ void visit_store_ssbo(isel_context *ctx, nir_intrinsic_instr *instr) store->operands[1].setFixed(m0); store->operands[2] = Operand(write_data); store->glc = nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT | ACCESS_NON_READABLE); + store->dlc = false; store->disable_wqm = true; store->barrier = barrier_buffer; ctx->block->instructions.emplace_back(std::move(store)); @@ -4195,6 +4205,7 @@ void visit_store_ssbo(isel_context *ctx, nir_intrinsic_instr *instr) store->offset = start * elem_size_bytes; store->offen = (offset.type() == RegType::vgpr); store->glc = nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT | ACCESS_NON_READABLE); + store->dlc = false; store->disable_wqm = true; store->barrier = barrier_buffer; ctx->program->needs_exact = true; @@ -4290,6 +4301,7 @@ void visit_atomic_ssbo(isel_context *ctx, nir_intrinsic_instr *instr) mubuf->offset = 0; mubuf->offen = (offset.type() == RegType::vgpr); mubuf->glc = return_previous; + mubuf->dlc = false; /* Not needed for atomics */ mubuf->disable_wqm = true; mubuf->barrier = barrier_buffer; ctx->program->needs_exact = true; @@ -4314,6 +4326,7 @@ void visit_load_global(isel_context *ctx, nir_intrinsic_instr *instr) Temp addr = get_ssa_temp(ctx, instr->src[0].ssa); bool glc = nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT); + bool dlc = glc && ctx->options->chip_class >= GFX10; aco_opcode op; if (dst.type() == RegType::vgpr || (glc && ctx->options->chip_class < GFX8)) { bool global = ctx->options->chip_class >= GFX9; @@ -4338,6 +4351,7 @@ void visit_load_global(isel_context *ctx, nir_intrinsic_instr *instr) flat->operands[0] = Operand(addr); flat->operands[1] = Operand(s1); flat->glc = glc; + flat->dlc = dlc; if (dst.type() == RegType::sgpr) { Temp vec = bld.tmp(RegType::vgpr, dst.size()); @@ -4369,6 +4383,7 @@ void visit_load_global(isel_context *ctx, nir_intrinsic_instr *instr) load->operands[1] = Operand(0u); load->definitions[0] = Definition(dst); load->glc = glc; + load->dlc = dlc; load->barrier = barrier_buffer; assert(ctx->options->chip_class >= GFX8 || !glc); @@ -4455,6 +4470,7 @@ void visit_store_global(isel_context *ctx, nir_intrinsic_instr *instr) flat->operands[1] = Operand(s1); flat->operands[2] = Operand(data); flat->glc = glc; + flat->dlc = false; flat->offset = offset; ctx->block->instructions.emplace_back(std::move(flat)); } @@ -7436,6 +7452,7 @@ static void emit_stream_output(isel_context *ctx, } store->offen = true; store->glc = true; + store->dlc = false; store->slc = true; store->can_reorder = true; ctx->block->instructions.emplace_back(std::move(store));