radeonsi: apply a TC L1 write corruption workaround for SI
authorMarek Olšák <marek.olsak@amd.com>
Tue, 29 Nov 2016 19:16:50 +0000 (20:16 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Thu, 1 Dec 2016 01:16:51 +0000 (02:16 +0100)
Cc: 13.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_shader.c

index 80c063aaeeccb5ff9946fa6a73ff069bf14db20b..ee2fce16b756aeb8e4da1b55d78ac29ed0a049fb 100644 (file)
@@ -3382,7 +3382,8 @@ static void image_append_args(
                struct si_shader_context *ctx,
                struct lp_build_emit_data * emit_data,
                unsigned target,
-               bool atomic)
+               bool atomic,
+               bool force_glc)
 {
        const struct tgsi_full_instruction *inst = emit_data->inst;
        LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
@@ -3390,6 +3391,7 @@ static void image_append_args(
        LLVMValueRef r128 = i1false;
        LLVMValueRef da = tgsi_is_array_image(target) ? i1true : i1false;
        LLVMValueRef glc =
+               force_glc ||
                inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ?
                i1true : i1false;
        LLVMValueRef slc = i1false;
@@ -3425,7 +3427,8 @@ static void buffer_append_args(
                LLVMValueRef rsrc,
                LLVMValueRef index,
                LLVMValueRef offset,
-               bool atomic)
+               bool atomic,
+               bool force_glc)
 {
        const struct tgsi_full_instruction *inst = emit_data->inst;
        LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
@@ -3436,6 +3439,7 @@ static void buffer_append_args(
        emit_data->args[emit_data->arg_count++] = offset; /* voffset */
        if (!atomic) {
                emit_data->args[emit_data->arg_count++] =
+                       force_glc ||
                        inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ?
                        i1true : i1false; /* glc */
        }
@@ -3465,7 +3469,7 @@ static void load_fetch_args(
                offset = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
 
                buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
-                                  offset, false);
+                                  offset, false, false);
        } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) {
                LLVMValueRef coords;
 
@@ -3474,14 +3478,14 @@ static void load_fetch_args(
 
                if (target == TGSI_TEXTURE_BUFFER) {
                        buffer_append_args(ctx, emit_data, rsrc, coords,
-                                       bld_base->uint_bld.zero, false);
+                                          bld_base->uint_bld.zero, false, false);
                } else {
                        emit_data->args[0] = coords;
                        emit_data->args[1] = rsrc;
                        emit_data->args[2] = lp_build_const_int32(gallivm, 15); /* dmask */
                        emit_data->arg_count = 3;
 
-                       image_append_args(ctx, emit_data, target, false);
+                       image_append_args(ctx, emit_data, target, false, false);
                }
        }
 }
@@ -3671,17 +3675,25 @@ static void store_fetch_args(
                offset = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
 
                buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
-                                  offset, false);
+                                  offset, false, false);
        } else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE) {
                unsigned target = inst->Memory.Texture;
                LLVMValueRef coords;
 
+               /* 8bit/16bit TC L1 write corruption bug on SI.
+                * All store opcodes not aligned to a dword are affected.
+                *
+                * The only way to get unaligned stores in radeonsi is through
+                * shader images.
+                */
+               bool force_glc = ctx->screen->b.chip_class == SI;
+
                coords = image_fetch_coords(bld_base, inst, 0);
 
                if (target == TGSI_TEXTURE_BUFFER) {
                        image_fetch_rsrc(bld_base, &memory, true, target, &rsrc);
                        buffer_append_args(ctx, emit_data, rsrc, coords,
-                                       bld_base->uint_bld.zero, false);
+                                          bld_base->uint_bld.zero, false, force_glc);
                } else {
                        emit_data->args[1] = coords;
                        image_fetch_rsrc(bld_base, &memory, true, target,
@@ -3689,7 +3701,7 @@ static void store_fetch_args(
                        emit_data->args[3] = lp_build_const_int32(gallivm, 15); /* dmask */
                        emit_data->arg_count = 4;
 
-                       image_append_args(ctx, emit_data, target, false);
+                       image_append_args(ctx, emit_data, target, false, force_glc);
                }
        }
 }
@@ -3873,7 +3885,7 @@ static void atomic_fetch_args(
                offset = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
 
                buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
-                                  offset, true);
+                                  offset, true, false);
        } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) {
                unsigned target = inst->Memory.Texture;
                LLVMValueRef coords;
@@ -3883,12 +3895,12 @@ static void atomic_fetch_args(
 
                if (target == TGSI_TEXTURE_BUFFER) {
                        buffer_append_args(ctx, emit_data, rsrc, coords,
-                                          bld_base->uint_bld.zero, true);
+                                          bld_base->uint_bld.zero, true, false);
                } else {
                        emit_data->args[emit_data->arg_count++] = coords;
                        emit_data->args[emit_data->arg_count++] = rsrc;
 
-                       image_append_args(ctx, emit_data, target, true);
+                       image_append_args(ctx, emit_data, target, true, false);
                }
        }
 }