From f671cc4d95eaf9ecfaafb216afeff7dc89f66cbf Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 22 Nov 2019 17:41:22 -0500 Subject: [PATCH] ac: set swizzled bit in cache policy as a hint not to merge loads/stores LLVM now merges loads and stores for all opcodes, so this must be set. Reviewed-by: Samuel Pitoiset --- src/amd/llvm/ac_llvm_build.c | 11 +++----- src/amd/llvm/ac_llvm_build.h | 4 +-- src/amd/llvm/ac_nir_to_llvm.c | 2 +- src/amd/vulkan/radv_nir_to_llvm.c | 20 +++++++------- .../radeonsi/si_compute_prim_discard.c | 2 +- src/gallium/drivers/radeonsi/si_shader.c | 26 +++++++++---------- .../drivers/radeonsi/si_shader_tgsi_mem.c | 3 +-- 7 files changed, 32 insertions(+), 36 deletions(-) diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index 35472900e98..60213fdd5d7 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -1237,8 +1237,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef voffset, LLVMValueRef soffset, unsigned inst_offset, - unsigned cache_policy, - bool swizzle_enable_hint) + unsigned cache_policy) { /* Split 3 channel stores, because only LLVM 9+ support 3-channel * intrinsics. */ @@ -1252,12 +1251,10 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx, v01 = ac_build_gather_values(ctx, v, 2); ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset, - soffset, inst_offset, cache_policy, - swizzle_enable_hint); + soffset, inst_offset, cache_policy); ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset, soffset, inst_offset + 8, - cache_policy, - swizzle_enable_hint); + cache_policy); return; } @@ -1265,7 +1262,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx, * (voffset is swizzled, but soffset isn't swizzled). * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter. */ - if (!swizzle_enable_hint) { + if (!(cache_policy & ac_swizzled)) { LLVMValueRef offset = soffset; if (inst_offset) diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h index 2357e42035c..8f6d56ab687 100644 --- a/src/amd/llvm/ac_llvm_build.h +++ b/src/amd/llvm/ac_llvm_build.h @@ -299,8 +299,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef voffset, LLVMValueRef soffset, unsigned inst_offset, - unsigned cache_policy, - bool swizzle_enable_hint); + unsigned cache_policy); void ac_build_buffer_store_format(struct ac_llvm_context *ctx, @@ -533,6 +532,7 @@ enum ac_image_cache_policy { ac_glc = 1 << 0, /* per-CU cache control */ ac_slc = 1 << 1, /* global L2 cache control */ ac_dlc = 1 << 2, /* per-shader-array cache control */ + ac_swizzled = 1 << 3, /* the access is swizzled, disabling load/store merging */ }; struct ac_image_args { diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 9e9ddf62555..2eba80a9c38 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -1650,7 +1650,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, ac_build_buffer_store_dword(&ctx->ac, rsrc, data, num_channels, offset, ctx->ac.i32_0, 0, - cache_policy, false); + cache_policy); } } } diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 5d87b9a675a..51422cea12b 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -742,13 +742,13 @@ store_tcs_output(struct ac_shader_abi *abi, if (!is_tess_factor && writemask != 0xF) ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1, buf_addr, oc_lds, - 4 * (base + chan), ac_glc, false); + 4 * (base + chan), ac_glc); } if (writemask == 0xF) { ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, src, 4, buf_addr, oc_lds, - (base * 4), ac_glc, false); + (base * 4), ac_glc); } } @@ -1037,7 +1037,7 @@ visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addr voffset, ac_get_arg(&ctx->ac, ctx->args->gs2vs_offset), - 0, ac_glc | ac_slc, true); + 0, ac_glc | ac_slc | ac_swizzled); } } @@ -1768,7 +1768,7 @@ radv_emit_stream_output(struct radv_shader_context *ctx, ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf], vdata, num_comps, so_write_offsets[buf], ctx->ac.i32_0, offset, - ac_glc | ac_slc, false); + ac_glc | ac_slc); } static void @@ -2173,7 +2173,7 @@ handle_es_outputs_post(struct radv_shader_context *ctx, NULL, ac_get_arg(&ctx->ac, ctx->args->es2gs_offset), (4 * param_index + j) * 4, - ac_glc | ac_slc, true); + ac_glc | ac_slc | ac_swizzled); } } } @@ -3635,7 +3635,7 @@ write_tess_factors(struct radv_shader_context *ctx) ac_build_buffer_store_dword(&ctx->ac, buffer, LLVMConstInt(ctx->ac.i32, 0x80000000, false), 1, ctx->ac.i32_0, tf_base, - 0, ac_glc, false); + 0, ac_glc); tf_offset += 4; ac_build_endif(&ctx->ac, 6504); @@ -3644,11 +3644,11 @@ write_tess_factors(struct radv_shader_context *ctx) /* Store the tessellation factors. */ ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, MIN2(stride, 4), byteoffset, tf_base, - tf_offset, ac_glc, false); + tf_offset, ac_glc); if (vec1) ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, stride - 4, byteoffset, tf_base, - 16 + tf_offset, ac_glc, false); + 16 + tf_offset, ac_glc); //store to offchip for TES to read - only if TES reads them if (ctx->args->options->key.tcs.tes_reads_tess_factors) { @@ -3666,7 +3666,7 @@ write_tess_factors(struct radv_shader_context *ctx) ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, outer_vec, outer_comps, tf_outer_offset, ac_get_arg(&ctx->ac, ctx->args->oc_lds), - 0, ac_glc, false); + 0, ac_glc); if (inner_comps) { param_inner = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER); tf_inner_offset = get_tcs_tes_buffer_address(ctx, NULL, @@ -3677,7 +3677,7 @@ write_tess_factors(struct radv_shader_context *ctx) ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, inner_vec, inner_comps, tf_inner_offset, ac_get_arg(&ctx->ac, ctx->args->oc_lds), - 0, ac_glc, false); + 0, ac_glc); } } diff --git a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c index a52966f2376..0bbaf4f306d 100644 --- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c +++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c @@ -822,7 +822,7 @@ void si_build_prim_discard_compute_shader(struct si_shader_context *ctx) }; LLVMValueRef rsrc = ac_build_gather_values(&ctx->ac, desc, 4); ac_build_buffer_store_dword(&ctx->ac, rsrc, count, 1, ctx->i32_0, - ctx->i32_0, 0, ac_glc | ac_slc, false); + ctx->i32_0, 0, ac_glc | ac_slc); } else { LLVMBuildStore(builder, count, si_expand_32bit_pointer(ctx, diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index b63a39efe2d..bad2bfdf130 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1309,7 +1309,7 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base, if (reg->Register.WriteMask != 0xF && !is_tess_factor) { ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1, buf_addr, base, - 4 * chan_index, ac_glc, false); + 4 * chan_index, ac_glc); } /* Write tess factors into VGPRs for the epilog. */ @@ -1329,7 +1329,7 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base, LLVMValueRef value = ac_build_gather_values(&ctx->ac, values, 4); ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buf_addr, - base, 0, ac_glc, false); + base, 0, ac_glc); } } @@ -1432,7 +1432,7 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi, ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1, addr, base, 4 * buffer_store_offset, - ac_glc, false); + ac_glc); } /* Write tess factors into VGPRs for the epilog. */ @@ -1452,7 +1452,7 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi, LLVMValueRef value = ac_build_gather_values(&ctx->ac, values, 4); ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, addr, - base, 0, ac_glc, false); + base, 0, ac_glc); } } @@ -2661,7 +2661,7 @@ void si_emit_streamout_output(struct si_shader_context *ctx, vdata, num_comps, so_write_offsets[buf_idx], ctx->i32_0, - stream_out->dst_offset * 4, ac_glc | ac_slc, false); + stream_out->dst_offset * 4, ac_glc | ac_slc); } /** @@ -3066,7 +3066,7 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base) LLVMValueRef value = lshs_lds_load(bld_base, ctx->ac.i32, ~0, lds_ptr); ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr, - buffer_offset, 0, ac_glc, false); + buffer_offset, 0, ac_glc); } } @@ -3191,7 +3191,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, ac_build_buffer_store_dword(&ctx->ac, buffer, LLVMConstInt(ctx->i32, 0x80000000, 0), 1, ctx->i32_0, tf_base, - offset, ac_glc, false); + offset, ac_glc); offset += 4; } @@ -3200,12 +3200,12 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, /* Store the tessellation factors. */ ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, MIN2(stride, 4), byteoffset, tf_base, - offset, ac_glc, false); + offset, ac_glc); offset += 16; if (vec1) ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, stride - 4, byteoffset, tf_base, - offset, ac_glc, false); + offset, ac_glc); /* Store the tess factors into the offchip buffer if TES reads them. */ if (shader->key.part.tcs.epilog.tes_reads_tess_factors) { @@ -3228,7 +3228,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec, outer_comps, tf_outer_offset, - base, 0, ac_glc, false); + base, 0, ac_glc); if (inner_comps) { param_inner = si_shader_io_get_unique_index_patch( TGSI_SEMANTIC_TESSINNER, 0); @@ -3239,7 +3239,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, ac_build_gather_values(&ctx->ac, inner, inner_comps); ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec, inner_comps, tf_inner_offset, - base, 0, ac_glc, false); + base, 0, ac_glc); } } @@ -3554,7 +3554,7 @@ static void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi, out_val, 1, NULL, ac_get_arg(&ctx->ac, ctx->es2gs_offset), (4 * param + chan) * 4, - ac_glc | ac_slc, true); + ac_glc | ac_slc | ac_swizzled); } } @@ -4283,7 +4283,7 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, ctx->gsvs_ring[stream], out_val, 1, voffset, soffset, 0, - ac_glc | ac_slc, true); + ac_glc | ac_slc | ac_swizzled); } } diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index 67db98d6fed..21b861b8244 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -649,8 +649,7 @@ static void store_emit_buffer(struct si_shader_context *ctx, } ac_build_buffer_store_dword(&ctx->ac, resource, data, count, - voff, ctx->i32_0, 0, cache_policy, - false); + voff, ctx->i32_0, 0, cache_policy); } } -- 2.30.2