switch (target) {
case TGSI_TEXTURE_1D:
case TGSI_TEXTURE_SHADOW1D:
- if (screen->info.chip_class >= GFX9)
+ if (screen->info.chip_class == GFX9)
return ac_image_2d;
return ac_image_1d;
case TGSI_TEXTURE_2D:
return ac_image_cube;
case TGSI_TEXTURE_1D_ARRAY:
case TGSI_TEXTURE_SHADOW1D_ARRAY:
- if (screen->info.chip_class >= GFX9)
+ if (screen->info.chip_class == GFX9)
return ac_image_2darray;
return ac_image_1darray;
case TGSI_TEXTURE_2D_ARRAY:
if (dim == ac_image_cube ||
(screen->info.chip_class <= GFX8 && dim == ac_image_3d))
dim = ac_image_2darray;
- else if (target == TGSI_TEXTURE_2D && screen->info.chip_class >= GFX9) {
+ else if (target == TGSI_TEXTURE_2D && screen->info.chip_class == GFX9) {
/* When a single layer of a 3D texture is bound, the shader
* will refer to a 2D target, but the descriptor has a 3D type.
* Since the HW ignores BASE_ARRAY in this case, we need to
LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
LLVMValueRef list, LLVMValueRef index,
- enum ac_descriptor_type desc_type, bool dcc_off,
- bool bindless)
+ enum ac_descriptor_type desc_type,
+ bool uses_store, bool bindless)
{
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef rsrc;
else
rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
- if (desc_type == AC_DESC_IMAGE && dcc_off)
+ if (ctx->ac.chip_class <= GFX9 &&
+ desc_type == AC_DESC_IMAGE && uses_store)
rsrc = force_dcc_off(ctx, rsrc);
return rsrc;
}
LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->main_fn,
ctx->param_samplers_and_images);
LLVMValueRef index;
- bool dcc_off = is_store;
if (!image->Register.Indirect) {
index = LLVMConstInt(ctx->i32,
*rsrc = si_load_image_desc(ctx, rsrc_ptr, index,
target == TGSI_TEXTURE_BUFFER ? AC_DESC_BUFFER : AC_DESC_IMAGE,
- dcc_off, bindless);
+ is_store, bindless);
}
static void image_fetch_coords(
coords[chan] = tmp;
}
- if (ctx->screen->info.chip_class >= GFX9) {
+ if (ctx->screen->info.chip_class == GFX9) {
/* 1D textures are allocated and used as 2D on GFX9. */
if (target == TGSI_TEXTURE_1D) {
coords[1] = ctx->i32_0;
* evicting L1 cache lines that may be needed by other
* instructions. */
writeonly_memory ||
- inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE)))
+ inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE))) {
cache_policy |= ac_glc;
+ }
if (inst->Memory.Qualifier & TGSI_MEMORY_STREAM_CACHE_POLICY)
cache_policy |= ac_slc;
emit_data->output[emit_data->chan] =
ac_build_buffer_load(&ctx->ac, args.resource,
util_last_bit(inst->Dst[0].Register.WriteMask),
- NULL, voffset, NULL, 0, 0, 0, true, true);
+ NULL, voffset, NULL, 0, 0, true, true);
return;
}
if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
- ac_build_waitcnt(&ctx->ac, VM_CNT);
+ ac_build_waitcnt(&ctx->ac, AC_WAIT_VLOAD | AC_WAIT_VSTORE);
can_speculate = !(inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) &&
is_oneway_access_only(inst, info,
ac_build_buffer_load(&ctx->ac, args.resource,
util_last_bit(inst->Dst[0].Register.WriteMask),
NULL, voffset, NULL, 0,
- !!(args.cache_policy & ac_glc),
- !!(args.cache_policy & ac_slc),
- can_speculate, false);
+ args.cache_policy, can_speculate, false);
return;
}
vindex,
ctx->i32_0,
num_channels,
- !!(args.cache_policy & ac_glc),
+ args.cache_policy,
can_speculate);
emit_data->output[emit_data->chan] =
ac_build_expand_to_vec4(&ctx->ac, result, num_channels);
}
ac_build_buffer_store_dword(&ctx->ac, resource, data, count,
- voff, ctx->i32_0, 0,
- !!(cache_policy & ac_glc),
- !!(cache_policy & ac_slc),
+ voff, ctx->i32_0, 0, cache_policy,
false);
}
}
}
if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
- ac_build_waitcnt(&ctx->ac, VM_CNT);
+ ac_build_waitcnt(&ctx->ac, AC_WAIT_VLOAD | AC_WAIT_VSTORE);
bool is_image = inst->Dst[0].Register.File != TGSI_FILE_BUFFER;
args.cache_policy = get_cache_policy(ctx, inst,
ac_build_gather_values(&ctx->ac, chans, num_channels),
vindex, ctx->i32_0 /* voffset */,
num_channels,
- !!(args.cache_policy & ac_glc),
- false);
+ args.cache_policy);
} else {
args.opcode = ac_image_store;
args.data[0] = ac_build_gather_values(&ctx->ac, chans, 4);
LLVMBuilderRef builder = ctx->ac.builder;
/* 1D textures are allocated and used as 2D on GFX9. */
- if (ctx->screen->info.chip_class >= GFX9 &&
+ if (ctx->screen->info.chip_class == GFX9 &&
(target == TGSI_TEXTURE_1D_ARRAY ||
target == TGSI_TEXTURE_SHADOW1D_ARRAY)) {
LLVMValueRef layers =
args.resource,
vindex,
ctx->i32_0,
- num_channels, false, true);
+ num_channels, 0, true);
emit_data->output[emit_data->chan] =
ac_build_expand_to_vec4(&ctx->ac, result, num_channels);
return;
*
* TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
* so the depth comparison value isn't clamped for Z16 and
- * Z24 anymore. Do it manually here.
+ * Z24 anymore. Do it manually here for GFX8-9; GFX10 has
+ * an explicitly clamped 32-bit float format.
*/
- if (ctx->screen->info.chip_class >= GFX8) {
+ if (ctx->screen->info.chip_class >= GFX8 &&
+ ctx->screen->info.chip_class <= GFX9) {
LLVMValueRef upgraded;
LLVMValueRef clamped;
upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler,
num_src_deriv_channels = 1;
/* 1D textures are allocated and used as 2D on GFX9. */
- if (ctx->screen->info.chip_class >= GFX9) {
+ if (ctx->screen->info.chip_class == GFX9) {
num_dst_deriv_channels = 2;
} else {
num_dst_deriv_channels = 1;
}
/* 1D textures are allocated and used as 2D on GFX9. */
- if (ctx->screen->info.chip_class >= GFX9) {
+ if (ctx->screen->info.chip_class == GFX9) {
LLVMValueRef filler;
/* Use 0.5, so that we don't sample the border color. */
}
}
- if (target == TGSI_TEXTURE_2D_MSAA ||
- target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
+ if ((target == TGSI_TEXTURE_2D_MSAA ||
+ target == TGSI_TEXTURE_2D_ARRAY_MSAA) &&
+ !(ctx->screen->debug_flags & DBG(NO_FMASK))) {
ac_apply_fmask_to_sample(&ctx->ac, fmask_ptr, args.coords,
target == TGSI_TEXTURE_2D_ARRAY_MSAA);
}
if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
args.coords[chan++] = si_get_sample_id(ctx);
- if (ctx->shader->key.mono.u.ps.fbfetch_msaa) {
+ if (ctx->shader->key.mono.u.ps.fbfetch_msaa &&
+ !(ctx->screen->debug_flags & DBG(NO_FMASK))) {
fmask = ac_build_load_to_sgpr(&ctx->ac, ptr,
LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0));