aco: implement nir_intrinsic_global_atomic_* on GFX6
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Fri, 17 Jan 2020 15:11:55 +0000 (16:11 +0100)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 23 Jan 2020 13:40:30 +0000 (14:40 +0100)
GFX6 doesn't have FLAT instructions, use MUBUF instructions instead.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3477>

src/amd/compiler/aco_instruction_selection.cpp

index 235cac4e30e6a0e26e3fd855af4a93c0473d4e3b..d87f718eb3edca9e6209b82f7adf42abea10fc89 100644 (file)
@@ -5012,75 +5012,149 @@ void visit_global_atomic(isel_context *ctx, nir_intrinsic_instr *instr)
    }
 
    Builder bld(ctx->program, ctx->block);
-   Temp addr = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
+   Temp addr = get_ssa_temp(ctx, instr->src[0].ssa);
    Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
 
+   if (ctx->options->chip_class >= GFX7)
+      addr = as_vgpr(ctx, addr);
+
    if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap)
       data = bld.pseudo(aco_opcode::p_create_vector, bld.def(RegType::vgpr, data.size() * 2),
                         get_ssa_temp(ctx, instr->src[2].ssa), data);
 
    Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
 
-   bool global = ctx->options->chip_class >= GFX9;
    aco_opcode op32, op64;
-   switch (instr->intrinsic) {
-      case nir_intrinsic_global_atomic_add:
-         op32 = global ? aco_opcode::global_atomic_add : aco_opcode::flat_atomic_add;
-         op64 = global ? aco_opcode::global_atomic_add_x2 : aco_opcode::flat_atomic_add_x2;
-         break;
-      case nir_intrinsic_global_atomic_imin:
-         op32 = global ? aco_opcode::global_atomic_smin : aco_opcode::flat_atomic_smin;
-         op64 = global ? aco_opcode::global_atomic_smin_x2 : aco_opcode::flat_atomic_smin_x2;
-         break;
-      case nir_intrinsic_global_atomic_umin:
-         op32 = global ? aco_opcode::global_atomic_umin : aco_opcode::flat_atomic_umin;
-         op64 = global ? aco_opcode::global_atomic_umin_x2 : aco_opcode::flat_atomic_umin_x2;
-         break;
-      case nir_intrinsic_global_atomic_imax:
-         op32 = global ? aco_opcode::global_atomic_smax : aco_opcode::flat_atomic_smax;
-         op64 = global ? aco_opcode::global_atomic_smax_x2 : aco_opcode::flat_atomic_smax_x2;
-         break;
-      case nir_intrinsic_global_atomic_umax:
-         op32 = global ? aco_opcode::global_atomic_umax : aco_opcode::flat_atomic_umax;
-         op64 = global ? aco_opcode::global_atomic_umax_x2 : aco_opcode::flat_atomic_umax_x2;
-         break;
-      case nir_intrinsic_global_atomic_and:
-         op32 = global ? aco_opcode::global_atomic_and : aco_opcode::flat_atomic_and;
-         op64 = global ? aco_opcode::global_atomic_and_x2 : aco_opcode::flat_atomic_and_x2;
-         break;
-      case nir_intrinsic_global_atomic_or:
-         op32 = global ? aco_opcode::global_atomic_or : aco_opcode::flat_atomic_or;
-         op64 = global ? aco_opcode::global_atomic_or_x2 : aco_opcode::flat_atomic_or_x2;
-         break;
-      case nir_intrinsic_global_atomic_xor:
-         op32 = global ? aco_opcode::global_atomic_xor : aco_opcode::flat_atomic_xor;
-         op64 = global ? aco_opcode::global_atomic_xor_x2 : aco_opcode::flat_atomic_xor_x2;
-         break;
-      case nir_intrinsic_global_atomic_exchange:
-         op32 = global ? aco_opcode::global_atomic_swap : aco_opcode::flat_atomic_swap;
-         op64 = global ? aco_opcode::global_atomic_swap_x2 : aco_opcode::flat_atomic_swap_x2;
-         break;
-      case nir_intrinsic_global_atomic_comp_swap:
-         op32 = global ? aco_opcode::global_atomic_cmpswap : aco_opcode::flat_atomic_cmpswap;
-         op64 = global ? aco_opcode::global_atomic_cmpswap_x2 : aco_opcode::flat_atomic_cmpswap_x2;
-         break;
-      default:
-         unreachable("visit_atomic_global should only be called with nir_intrinsic_global_atomic_* instructions.");
+
+   if (ctx->options->chip_class >= GFX7) {
+      bool global = ctx->options->chip_class >= GFX9;
+      switch (instr->intrinsic) {
+         case nir_intrinsic_global_atomic_add:
+            op32 = global ? aco_opcode::global_atomic_add : aco_opcode::flat_atomic_add;
+            op64 = global ? aco_opcode::global_atomic_add_x2 : aco_opcode::flat_atomic_add_x2;
+            break;
+         case nir_intrinsic_global_atomic_imin:
+            op32 = global ? aco_opcode::global_atomic_smin : aco_opcode::flat_atomic_smin;
+            op64 = global ? aco_opcode::global_atomic_smin_x2 : aco_opcode::flat_atomic_smin_x2;
+            break;
+         case nir_intrinsic_global_atomic_umin:
+            op32 = global ? aco_opcode::global_atomic_umin : aco_opcode::flat_atomic_umin;
+            op64 = global ? aco_opcode::global_atomic_umin_x2 : aco_opcode::flat_atomic_umin_x2;
+            break;
+         case nir_intrinsic_global_atomic_imax:
+            op32 = global ? aco_opcode::global_atomic_smax : aco_opcode::flat_atomic_smax;
+            op64 = global ? aco_opcode::global_atomic_smax_x2 : aco_opcode::flat_atomic_smax_x2;
+            break;
+         case nir_intrinsic_global_atomic_umax:
+            op32 = global ? aco_opcode::global_atomic_umax : aco_opcode::flat_atomic_umax;
+            op64 = global ? aco_opcode::global_atomic_umax_x2 : aco_opcode::flat_atomic_umax_x2;
+            break;
+         case nir_intrinsic_global_atomic_and:
+            op32 = global ? aco_opcode::global_atomic_and : aco_opcode::flat_atomic_and;
+            op64 = global ? aco_opcode::global_atomic_and_x2 : aco_opcode::flat_atomic_and_x2;
+            break;
+         case nir_intrinsic_global_atomic_or:
+            op32 = global ? aco_opcode::global_atomic_or : aco_opcode::flat_atomic_or;
+            op64 = global ? aco_opcode::global_atomic_or_x2 : aco_opcode::flat_atomic_or_x2;
+            break;
+         case nir_intrinsic_global_atomic_xor:
+            op32 = global ? aco_opcode::global_atomic_xor : aco_opcode::flat_atomic_xor;
+            op64 = global ? aco_opcode::global_atomic_xor_x2 : aco_opcode::flat_atomic_xor_x2;
+            break;
+         case nir_intrinsic_global_atomic_exchange:
+            op32 = global ? aco_opcode::global_atomic_swap : aco_opcode::flat_atomic_swap;
+            op64 = global ? aco_opcode::global_atomic_swap_x2 : aco_opcode::flat_atomic_swap_x2;
+            break;
+         case nir_intrinsic_global_atomic_comp_swap:
+            op32 = global ? aco_opcode::global_atomic_cmpswap : aco_opcode::flat_atomic_cmpswap;
+            op64 = global ? aco_opcode::global_atomic_cmpswap_x2 : aco_opcode::flat_atomic_cmpswap_x2;
+            break;
+         default:
+            unreachable("visit_atomic_global should only be called with nir_intrinsic_global_atomic_* instructions.");
+      }
+
+      aco_opcode op = instr->dest.ssa.bit_size == 32 ? op32 : op64;
+      aco_ptr<FLAT_instruction> flat{create_instruction<FLAT_instruction>(op, global ? Format::GLOBAL : Format::FLAT, 3, return_previous ? 1 : 0)};
+      flat->operands[0] = Operand(addr);
+      flat->operands[1] = Operand(s1);
+      flat->operands[2] = Operand(data);
+      if (return_previous)
+         flat->definitions[0] = Definition(dst);
+      flat->glc = return_previous;
+      flat->dlc = false; /* Not needed for atomics */
+      flat->offset = 0;
+      flat->disable_wqm = true;
+      flat->barrier = barrier_buffer;
+      ctx->program->needs_exact = true;
+      ctx->block->instructions.emplace_back(std::move(flat));
+   } else {
+      assert(ctx->options->chip_class == GFX6);
+
+      switch (instr->intrinsic) {
+         case nir_intrinsic_global_atomic_add:
+            op32 = aco_opcode::buffer_atomic_add;
+            op64 = aco_opcode::buffer_atomic_add_x2;
+            break;
+         case nir_intrinsic_global_atomic_imin:
+            op32 = aco_opcode::buffer_atomic_smin;
+            op64 = aco_opcode::buffer_atomic_smin_x2;
+            break;
+         case nir_intrinsic_global_atomic_umin:
+            op32 = aco_opcode::buffer_atomic_umin;
+            op64 = aco_opcode::buffer_atomic_umin_x2;
+            break;
+         case nir_intrinsic_global_atomic_imax:
+            op32 = aco_opcode::buffer_atomic_smax;
+            op64 = aco_opcode::buffer_atomic_smax_x2;
+            break;
+         case nir_intrinsic_global_atomic_umax:
+            op32 = aco_opcode::buffer_atomic_umax;
+            op64 = aco_opcode::buffer_atomic_umax_x2;
+            break;
+         case nir_intrinsic_global_atomic_and:
+            op32 = aco_opcode::buffer_atomic_and;
+            op64 = aco_opcode::buffer_atomic_and_x2;
+            break;
+         case nir_intrinsic_global_atomic_or:
+            op32 = aco_opcode::buffer_atomic_or;
+            op64 = aco_opcode::buffer_atomic_or_x2;
+            break;
+         case nir_intrinsic_global_atomic_xor:
+            op32 = aco_opcode::buffer_atomic_xor;
+            op64 = aco_opcode::buffer_atomic_xor_x2;
+            break;
+         case nir_intrinsic_global_atomic_exchange:
+            op32 = aco_opcode::buffer_atomic_swap;
+            op64 = aco_opcode::buffer_atomic_swap_x2;
+            break;
+         case nir_intrinsic_global_atomic_comp_swap:
+            op32 = aco_opcode::buffer_atomic_cmpswap;
+            op64 = aco_opcode::buffer_atomic_cmpswap_x2;
+            break;
+         default:
+            unreachable("visit_atomic_global should only be called with nir_intrinsic_global_atomic_* instructions.");
+      }
+
+      Temp rsrc = get_gfx6_global_rsrc(bld, addr);
+
+      aco_opcode op = instr->dest.ssa.bit_size == 32 ? op32 : op64;
+
+      aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, return_previous ? 1 : 0)};
+      mubuf->operands[0] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1);
+      mubuf->operands[1] = Operand(rsrc);
+      mubuf->operands[2] = Operand(0u);
+      mubuf->operands[3] = Operand(data);
+      if (return_previous)
+         mubuf->definitions[0] = Definition(dst);
+      mubuf->glc = return_previous;
+      mubuf->dlc = false;
+      mubuf->offset = 0;
+      mubuf->addr64 = addr.type() == RegType::vgpr;
+      mubuf->disable_wqm = true;
+      mubuf->barrier = barrier_buffer;
+      ctx->program->needs_exact = true;
+      ctx->block->instructions.emplace_back(std::move(mubuf));
    }
-   aco_opcode op = instr->dest.ssa.bit_size == 32 ? op32 : op64;
-   aco_ptr<FLAT_instruction> flat{create_instruction<FLAT_instruction>(op, global ? Format::GLOBAL : Format::FLAT, 3, return_previous ? 1 : 0)};
-   flat->operands[0] = Operand(addr);
-   flat->operands[1] = Operand(s1);
-   flat->operands[2] = Operand(data);
-   if (return_previous)
-      flat->definitions[0] = Definition(dst);
-   flat->glc = return_previous;
-   flat->dlc = false; /* Not needed for atomics */
-   flat->offset = 0;
-   flat->disable_wqm = true;
-   flat->barrier = barrier_buffer;
-   ctx->program->needs_exact = true;
-   ctx->block->instructions.emplace_back(std::move(flat));
 }
 
 void emit_memory_barrier(isel_context *ctx, nir_intrinsic_instr *instr) {