aco: be more careful about using SMEM for load_global
authorRhys Perry <pendingchaos02@gmail.com>
Mon, 6 Apr 2020 19:15:36 +0000 (20:15 +0100)
committerMarge Bot <eric+marge@anholt.net>
Fri, 24 Apr 2020 18:52:54 +0000 (18:52 +0000)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4639>

src/amd/compiler/aco_instruction_selection.cpp

index c83cf00f01a86c29b01a783ba9ca06e804a8c4b9..c86d69baae9ee0ca5466821d676612bb07af6c33 100644 (file)
@@ -6082,12 +6082,14 @@ void visit_load_global(isel_context *ctx, nir_intrinsic_instr *instr)
 
    bool glc = nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT);
    bool dlc = glc && ctx->options->chip_class >= GFX10;
+   /* VMEM stores don't update the SMEM cache and it's difficult to prove that
+    * it's safe to use SMEM */
+   bool can_use_smem = nir_intrinsic_access(instr) & ACCESS_NON_WRITEABLE;
    aco_opcode op;
-   if (dst.type() == RegType::vgpr || (glc && ctx->options->chip_class < GFX8)) {
+   if (dst.type() == RegType::vgpr || (glc && ctx->options->chip_class < GFX8) || !can_use_smem) {
       bool global = ctx->options->chip_class >= GFX9;
 
       if (ctx->options->chip_class >= GFX7) {
-         aco_opcode op;
          switch (num_bytes) {
          case 4:
             op = global ? aco_opcode::global_load_dword : aco_opcode::flat_load_dword;
@@ -6128,7 +6130,6 @@ void visit_load_global(isel_context *ctx, nir_intrinsic_instr *instr)
          /* GFX6 doesn't support loading vec3, expand to vec4. */
          num_bytes = num_bytes == 12 ? 16 : num_bytes;
 
-         aco_opcode op;
          switch (num_bytes) {
          case 4:
             op = aco_opcode::buffer_load_dword;