LLVM now merges loads and stores for all opcodes, so this must be set.
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
LLVMValueRef voffset,
LLVMValueRef soffset,
unsigned inst_offset,
- unsigned cache_policy,
- bool swizzle_enable_hint)
+ unsigned cache_policy)
{
/* Split 3 channel stores, because only LLVM 9+ support 3-channel
* intrinsics. */
v01 = ac_build_gather_values(ctx, v, 2);
ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset,
- soffset, inst_offset, cache_policy,
- swizzle_enable_hint);
+ soffset, inst_offset, cache_policy);
ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset,
soffset, inst_offset + 8,
- cache_policy,
- swizzle_enable_hint);
+ cache_policy);
return;
}
* (voffset is swizzled, but soffset isn't swizzled).
* llvm.amdgcn.buffer.store doesn't have a separate soffset parameter.
*/
- if (!swizzle_enable_hint) {
+ if (!(cache_policy & ac_swizzled)) {
LLVMValueRef offset = soffset;
if (inst_offset)
LLVMValueRef voffset,
LLVMValueRef soffset,
unsigned inst_offset,
- unsigned cache_policy,
- bool swizzle_enable_hint);
+ unsigned cache_policy);
void
ac_build_buffer_store_format(struct ac_llvm_context *ctx,
ac_glc = 1 << 0, /* per-CU cache control */
ac_slc = 1 << 1, /* global L2 cache control */
ac_dlc = 1 << 2, /* per-shader-array cache control */
+ ac_swizzled = 1 << 3, /* the access is swizzled, disabling load/store merging */
};
struct ac_image_args {
ac_build_buffer_store_dword(&ctx->ac, rsrc, data,
num_channels, offset,
ctx->ac.i32_0, 0,
- cache_policy, false);
+ cache_policy);
}
}
}
if (!is_tess_factor && writemask != 0xF)
ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1,
buf_addr, oc_lds,
- 4 * (base + chan), ac_glc, false);
+ 4 * (base + chan), ac_glc);
}
if (writemask == 0xF) {
ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, src, 4,
buf_addr, oc_lds,
- (base * 4), ac_glc, false);
+ (base * 4), ac_glc);
}
}
voffset,
ac_get_arg(&ctx->ac,
ctx->args->gs2vs_offset),
- 0, ac_glc | ac_slc, true);
+ 0, ac_glc | ac_slc | ac_swizzled);
}
}
ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf],
vdata, num_comps, so_write_offsets[buf],
ctx->ac.i32_0, offset,
- ac_glc | ac_slc, false);
+ ac_glc | ac_slc);
}
static void
NULL,
ac_get_arg(&ctx->ac, ctx->args->es2gs_offset),
(4 * param_index + j) * 4,
- ac_glc | ac_slc, true);
+ ac_glc | ac_slc | ac_swizzled);
}
}
}
ac_build_buffer_store_dword(&ctx->ac, buffer,
LLVMConstInt(ctx->ac.i32, 0x80000000, false),
1, ctx->ac.i32_0, tf_base,
- 0, ac_glc, false);
+ 0, ac_glc);
tf_offset += 4;
ac_build_endif(&ctx->ac, 6504);
/* Store the tessellation factors. */
ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
MIN2(stride, 4), byteoffset, tf_base,
- tf_offset, ac_glc, false);
+ tf_offset, ac_glc);
if (vec1)
ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
stride - 4, byteoffset, tf_base,
- 16 + tf_offset, ac_glc, false);
+ 16 + tf_offset, ac_glc);
//store to offchip for TES to read - only if TES reads them
if (ctx->args->options->key.tcs.tes_reads_tess_factors) {
ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, outer_vec,
outer_comps, tf_outer_offset,
ac_get_arg(&ctx->ac, ctx->args->oc_lds),
- 0, ac_glc, false);
+ 0, ac_glc);
if (inner_comps) {
param_inner = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER);
tf_inner_offset = get_tcs_tes_buffer_address(ctx, NULL,
ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, inner_vec,
inner_comps, tf_inner_offset,
ac_get_arg(&ctx->ac, ctx->args->oc_lds),
- 0, ac_glc, false);
+ 0, ac_glc);
}
}
};
LLVMValueRef rsrc = ac_build_gather_values(&ctx->ac, desc, 4);
ac_build_buffer_store_dword(&ctx->ac, rsrc, count, 1, ctx->i32_0,
- ctx->i32_0, 0, ac_glc | ac_slc, false);
+ ctx->i32_0, 0, ac_glc | ac_slc);
} else {
LLVMBuildStore(builder, count,
si_expand_32bit_pointer(ctx,
if (reg->Register.WriteMask != 0xF && !is_tess_factor) {
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
buf_addr, base,
- 4 * chan_index, ac_glc, false);
+ 4 * chan_index, ac_glc);
}
/* Write tess factors into VGPRs for the epilog. */
LLVMValueRef value = ac_build_gather_values(&ctx->ac,
values, 4);
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buf_addr,
- base, 0, ac_glc, false);
+ base, 0, ac_glc);
}
}
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
addr, base,
4 * buffer_store_offset,
- ac_glc, false);
+ ac_glc);
}
/* Write tess factors into VGPRs for the epilog. */
LLVMValueRef value = ac_build_gather_values(&ctx->ac,
values, 4);
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, addr,
- base, 0, ac_glc, false);
+ base, 0, ac_glc);
}
}
vdata, num_comps,
so_write_offsets[buf_idx],
ctx->i32_0,
- stream_out->dst_offset * 4, ac_glc | ac_slc, false);
+ stream_out->dst_offset * 4, ac_glc | ac_slc);
}
/**
LLVMValueRef value = lshs_lds_load(bld_base, ctx->ac.i32, ~0, lds_ptr);
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr,
- buffer_offset, 0, ac_glc, false);
+ buffer_offset, 0, ac_glc);
}
}
ac_build_buffer_store_dword(&ctx->ac, buffer,
LLVMConstInt(ctx->i32, 0x80000000, 0),
1, ctx->i32_0, tf_base,
- offset, ac_glc, false);
+ offset, ac_glc);
offset += 4;
}
/* Store the tessellation factors. */
ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
MIN2(stride, 4), byteoffset, tf_base,
- offset, ac_glc, false);
+ offset, ac_glc);
offset += 16;
if (vec1)
ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
stride - 4, byteoffset, tf_base,
- offset, ac_glc, false);
+ offset, ac_glc);
/* Store the tess factors into the offchip buffer if TES reads them. */
if (shader->key.part.tcs.epilog.tes_reads_tess_factors) {
ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec,
outer_comps, tf_outer_offset,
- base, 0, ac_glc, false);
+ base, 0, ac_glc);
if (inner_comps) {
param_inner = si_shader_io_get_unique_index_patch(
TGSI_SEMANTIC_TESSINNER, 0);
ac_build_gather_values(&ctx->ac, inner, inner_comps);
ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec,
inner_comps, tf_inner_offset,
- base, 0, ac_glc, false);
+ base, 0, ac_glc);
}
}
out_val, 1, NULL,
ac_get_arg(&ctx->ac, ctx->es2gs_offset),
(4 * param + chan) * 4,
- ac_glc | ac_slc, true);
+ ac_glc | ac_slc | ac_swizzled);
}
}
ctx->gsvs_ring[stream],
out_val, 1,
voffset, soffset, 0,
- ac_glc | ac_slc, true);
+ ac_glc | ac_slc | ac_swizzled);
}
}
}
ac_build_buffer_store_dword(&ctx->ac, resource, data, count,
- voff, ctx->i32_0, 0, cache_policy,
- false);
+ voff, ctx->i32_0, 0, cache_policy);
}
}