ac: set swizzled bit in cache policy as a hint not to merge loads/stores
authorMarek Olšák <marek.olsak@amd.com>
Fri, 22 Nov 2019 22:41:22 +0000 (17:41 -0500)
committerMarek Olšák <marek.olsak@amd.com>
Mon, 25 Nov 2019 21:48:27 +0000 (16:48 -0500)
LLVM now merges loads and stores for all opcodes, so this must be set.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
src/amd/llvm/ac_llvm_build.c
src/amd/llvm/ac_llvm_build.h
src/amd/llvm/ac_nir_to_llvm.c
src/amd/vulkan/radv_nir_to_llvm.c
src/gallium/drivers/radeonsi/si_compute_prim_discard.c
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c

index 35472900e98dda7920190be32e201fef559c4e0f..60213fdd5d76fe4985701beb1cc42396a759ba6b 100644 (file)
@@ -1237,8 +1237,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
                            LLVMValueRef voffset,
                            LLVMValueRef soffset,
                            unsigned inst_offset,
-                           unsigned cache_policy,
-                           bool swizzle_enable_hint)
+                           unsigned cache_policy)
 {
        /* Split 3 channel stores, because only LLVM 9+ support 3-channel
         * intrinsics. */
@@ -1252,12 +1251,10 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
                v01 = ac_build_gather_values(ctx, v, 2);
 
                ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset,
-                                           soffset, inst_offset, cache_policy,
-                                           swizzle_enable_hint);
+                                           soffset, inst_offset, cache_policy);
                ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset,
                                            soffset, inst_offset + 8,
-                                           cache_policy,
-                                           swizzle_enable_hint);
+                                           cache_policy);
                return;
        }
 
@@ -1265,7 +1262,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
         * (voffset is swizzled, but soffset isn't swizzled).
         * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter.
         */
-       if (!swizzle_enable_hint) {
+       if (!(cache_policy & ac_swizzled)) {
                LLVMValueRef offset = soffset;
 
                if (inst_offset)
index 2357e42035c126edd662e55957e420a139b33222..8f6d56ab687ae9cbdb3cc7b748ac689adef6cfc6 100644 (file)
@@ -299,8 +299,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
                            LLVMValueRef voffset,
                            LLVMValueRef soffset,
                            unsigned inst_offset,
-                           unsigned cache_policy,
-                           bool swizzle_enable_hint);
+                           unsigned cache_policy);
 
 void
 ac_build_buffer_store_format(struct ac_llvm_context *ctx,
@@ -533,6 +532,7 @@ enum ac_image_cache_policy {
        ac_glc = 1 << 0, /* per-CU cache control */
        ac_slc = 1 << 1, /* global L2 cache control */
        ac_dlc = 1 << 2, /* per-shader-array cache control */
+       ac_swizzled = 1 << 3, /* the access is swizzled, disabling load/store merging */
 };
 
 struct ac_image_args {
index 9e9ddf62555dd63b9673aef998a99c08a0a2cf57..2eba80a9c3893fba1cec253eaac3d651e0c62fcc 100644 (file)
@@ -1650,7 +1650,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
                        ac_build_buffer_store_dword(&ctx->ac, rsrc, data,
                                                    num_channels, offset,
                                                    ctx->ac.i32_0, 0,
-                                                   cache_policy, false);
+                                                   cache_policy);
                }
        }
 }
index 5d87b9a675a05c18d788ed276a340b3cc53d5755..51422cea12bccd74f8f4f8f3dab9b21dade5157a 100644 (file)
@@ -742,13 +742,13 @@ store_tcs_output(struct ac_shader_abi *abi,
                if (!is_tess_factor && writemask != 0xF)
                        ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1,
                                                    buf_addr, oc_lds,
-                                                   4 * (base + chan), ac_glc, false);
+                                                   4 * (base + chan), ac_glc);
        }
 
        if (writemask == 0xF) {
                ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, src, 4,
                                            buf_addr, oc_lds,
-                                           (base * 4), ac_glc, false);
+                                           (base * 4), ac_glc);
        }
 }
 
@@ -1037,7 +1037,7 @@ visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addr
                                                    voffset,
                                                    ac_get_arg(&ctx->ac,
                                                               ctx->args->gs2vs_offset),
-                                                   0, ac_glc | ac_slc, true);
+                                                   0, ac_glc | ac_slc | ac_swizzled);
                }
        }
 
@@ -1768,7 +1768,7 @@ radv_emit_stream_output(struct radv_shader_context *ctx,
        ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf],
                                    vdata, num_comps, so_write_offsets[buf],
                                    ctx->ac.i32_0, offset,
-                                   ac_glc | ac_slc, false);
+                                   ac_glc | ac_slc);
 }
 
 static void
@@ -2173,7 +2173,7 @@ handle_es_outputs_post(struct radv_shader_context *ctx,
                                                            NULL,
                                                            ac_get_arg(&ctx->ac, ctx->args->es2gs_offset),
                                                            (4 * param_index + j) * 4,
-                                                           ac_glc | ac_slc, true);
+                                                           ac_glc | ac_slc | ac_swizzled);
                        }
                }
        }
@@ -3635,7 +3635,7 @@ write_tess_factors(struct radv_shader_context *ctx)
                ac_build_buffer_store_dword(&ctx->ac, buffer,
                                            LLVMConstInt(ctx->ac.i32, 0x80000000, false),
                                            1, ctx->ac.i32_0, tf_base,
-                                           0, ac_glc, false);
+                                           0, ac_glc);
                tf_offset += 4;
 
                ac_build_endif(&ctx->ac, 6504);
@@ -3644,11 +3644,11 @@ write_tess_factors(struct radv_shader_context *ctx)
        /* Store the tessellation factors. */
        ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
                                    MIN2(stride, 4), byteoffset, tf_base,
-                                   tf_offset, ac_glc, false);
+                                   tf_offset, ac_glc);
        if (vec1)
                ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
                                            stride - 4, byteoffset, tf_base,
-                                           16 + tf_offset, ac_glc, false);
+                                           16 + tf_offset, ac_glc);
 
        //store to offchip for TES to read - only if TES reads them
        if (ctx->args->options->key.tcs.tes_reads_tess_factors) {
@@ -3666,7 +3666,7 @@ write_tess_factors(struct radv_shader_context *ctx)
                ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, outer_vec,
                                            outer_comps, tf_outer_offset,
                                            ac_get_arg(&ctx->ac, ctx->args->oc_lds),
-                                           0, ac_glc, false);
+                                           0, ac_glc);
                if (inner_comps) {
                        param_inner = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER);
                        tf_inner_offset = get_tcs_tes_buffer_address(ctx, NULL,
@@ -3677,7 +3677,7 @@ write_tess_factors(struct radv_shader_context *ctx)
                        ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, inner_vec,
                                                    inner_comps, tf_inner_offset,
                                                    ac_get_arg(&ctx->ac, ctx->args->oc_lds),
-                                                   0, ac_glc, false);
+                                                   0, ac_glc);
                }
        }
        
index a52966f2376ee6fa3c9e883f233f83eaf11784a7..0bbaf4f306da1d2105d1d7a181f932d226d9f237 100644 (file)
@@ -822,7 +822,7 @@ void si_build_prim_discard_compute_shader(struct si_shader_context *ctx)
                        };
                        LLVMValueRef rsrc = ac_build_gather_values(&ctx->ac, desc, 4);
                        ac_build_buffer_store_dword(&ctx->ac, rsrc, count, 1, ctx->i32_0,
-                                                   ctx->i32_0, 0, ac_glc | ac_slc, false);
+                                                   ctx->i32_0, 0, ac_glc | ac_slc);
                } else {
                        LLVMBuildStore(builder, count,
                                       si_expand_32bit_pointer(ctx,
index b63a39efe2da5e67a492a4608030c6759e9bd423..bad2bfdf130b72ffa0fdc79154e40df840d1b654 100644 (file)
@@ -1309,7 +1309,7 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
                if (reg->Register.WriteMask != 0xF && !is_tess_factor) {
                        ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
                                                    buf_addr, base,
-                                                   4 * chan_index, ac_glc, false);
+                                                   4 * chan_index, ac_glc);
                }
 
                /* Write tess factors into VGPRs for the epilog. */
@@ -1329,7 +1329,7 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
                LLVMValueRef value = ac_build_gather_values(&ctx->ac,
                                                            values, 4);
                ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buf_addr,
-                                           base, 0, ac_glc, false);
+                                           base, 0, ac_glc);
        }
 }
 
@@ -1432,7 +1432,7 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
                        ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
                                                    addr, base,
                                                    4 * buffer_store_offset,
-                                                    ac_glc, false);
+                                                    ac_glc);
                }
 
                /* Write tess factors into VGPRs for the epilog. */
@@ -1452,7 +1452,7 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
                LLVMValueRef value = ac_build_gather_values(&ctx->ac,
                                                            values, 4);
                ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, addr,
-                                           base, 0, ac_glc, false);
+                                           base, 0, ac_glc);
        }
 }
 
@@ -2661,7 +2661,7 @@ void si_emit_streamout_output(struct si_shader_context *ctx,
                                    vdata, num_comps,
                                    so_write_offsets[buf_idx],
                                    ctx->i32_0,
-                                   stream_out->dst_offset * 4, ac_glc | ac_slc, false);
+                                   stream_out->dst_offset * 4, ac_glc | ac_slc);
 }
 
 /**
@@ -3066,7 +3066,7 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
                LLVMValueRef value = lshs_lds_load(bld_base, ctx->ac.i32, ~0, lds_ptr);
 
                ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr,
-                                           buffer_offset, 0, ac_glc, false);
+                                           buffer_offset, 0, ac_glc);
        }
 }
 
@@ -3191,7 +3191,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
                ac_build_buffer_store_dword(&ctx->ac, buffer,
                                            LLVMConstInt(ctx->i32, 0x80000000, 0),
                                            1, ctx->i32_0, tf_base,
-                                           offset, ac_glc, false);
+                                           offset, ac_glc);
                offset += 4;
        }
 
@@ -3200,12 +3200,12 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
        /* Store the tessellation factors. */
        ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
                                    MIN2(stride, 4), byteoffset, tf_base,
-                                   offset, ac_glc, false);
+                                   offset, ac_glc);
        offset += 16;
        if (vec1)
                ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
                                            stride - 4, byteoffset, tf_base,
-                                           offset, ac_glc, false);
+                                           offset, ac_glc);
 
        /* Store the tess factors into the offchip buffer if TES reads them. */
        if (shader->key.part.tcs.epilog.tes_reads_tess_factors) {
@@ -3228,7 +3228,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
 
                ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec,
                                            outer_comps, tf_outer_offset,
-                                           base, 0, ac_glc, false);
+                                           base, 0, ac_glc);
                if (inner_comps) {
                        param_inner = si_shader_io_get_unique_index_patch(
                                              TGSI_SEMANTIC_TESSINNER, 0);
@@ -3239,7 +3239,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
                                    ac_build_gather_values(&ctx->ac, inner, inner_comps);
                        ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec,
                                                    inner_comps, tf_inner_offset,
-                                                   base, 0, ac_glc, false);
+                                                   base, 0, ac_glc);
                }
        }
 
@@ -3554,7 +3554,7 @@ static void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi,
                                                    out_val, 1, NULL,
                                                    ac_get_arg(&ctx->ac, ctx->es2gs_offset),
                                                    (4 * param + chan) * 4,
-                                                   ac_glc | ac_slc, true);
+                                                   ac_glc | ac_slc | ac_swizzled);
                }
        }
 
@@ -4283,7 +4283,7 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi,
                                                    ctx->gsvs_ring[stream],
                                                    out_val, 1,
                                                    voffset, soffset, 0,
-                                                   ac_glc | ac_slc, true);
+                                                   ac_glc | ac_slc | ac_swizzled);
                }
        }
 
index 67db98d6fed69162b182ea807a189aeecd3d8b48..21b861b82448be36b79ef0508b7c8abcdb64a6f7 100644 (file)
@@ -649,8 +649,7 @@ static void store_emit_buffer(struct si_shader_context *ctx,
                }
 
                ac_build_buffer_store_dword(&ctx->ac, resource, data, count,
-                                           voff, ctx->i32_0, 0, cache_policy,
-                                           false);
+                                           voff, ctx->i32_0, 0, cache_policy);
        }
 }