aco: refactor visit_store_scratch() to use new helpers
authorRhys Perry <pendingchaos02@gmail.com>
Thu, 16 Apr 2020 18:27:13 +0000 (19:27 +0100)
committerMarge Bot <eric+marge@anholt.net>
Fri, 24 Apr 2020 18:52:54 +0000 (18:52 +0000)
Should support 8/16-bit stores now

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4639>

src/amd/compiler/aco_instruction_selection.cpp

index 8993f2c1a10de732c1b7a01c0c393aa99763c491..47d3f015c2ac4297b21eb8a93c80e1017e6e3482 100644 (file)
@@ -6798,46 +6798,29 @@ void visit_load_scratch(isel_context *ctx, nir_intrinsic_instr *instr) {
 }
 
 void visit_store_scratch(isel_context *ctx, nir_intrinsic_instr *instr) {
-   assert(instr->src[0].ssa->bit_size == 32 || instr->src[0].ssa->bit_size == 64);
    Builder bld(ctx->program, ctx->block);
    Temp rsrc = get_scratch_resource(ctx);
    Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
    Temp offset = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
 
    unsigned elem_size_bytes = instr->src[0].ssa->bit_size / 8;
-   unsigned writemask = nir_intrinsic_write_mask(instr);
-
-   while (writemask) {
-      int start, count;
-      u_bit_scan_consecutive_range(&writemask, &start, &count);
-      int num_bytes = count * elem_size_bytes;
-
-      if (num_bytes > 16) {
-         assert(elem_size_bytes == 8);
-         writemask |= (((count - 2) << 1) - 1) << (start + 2);
-         count = 2;
-         num_bytes = 16;
-      }
-
-      // TODO: check alignment of sub-dword stores
-      // TODO: split 3 bytes. there is no store instruction for that
+   unsigned writemask = widen_mask(nir_intrinsic_write_mask(instr), elem_size_bytes);
 
-      Temp write_data;
-      if (count != instr->num_components) {
-         aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector, Format::PSEUDO, count, 1)};
-         for (int i = 0; i < count; i++) {
-            Temp elem = emit_extract_vector(ctx, data, start + i, RegClass(RegType::vgpr, elem_size_bytes / 4));
-            vec->operands[i] = Operand(elem);
-         }
-         write_data = bld.tmp(RegClass(RegType::vgpr, count * elem_size_bytes / 4));
-         vec->definitions[0] = Definition(write_data);
-         ctx->block->instructions.emplace_back(std::move(vec));
-      } else {
-         write_data = data;
-      }
+   unsigned write_count = 0;
+   Temp write_datas[32];
+   unsigned offsets[32];
+   split_buffer_store(ctx, instr, false, RegType::vgpr, data, writemask,
+                      16, &write_count, write_datas, offsets);
 
+   for (unsigned i = 0; i < write_count; i++) {
       aco_opcode op;
-      switch (num_bytes) {
+      switch (write_datas[i].bytes()) {
+         case 1:
+            op = aco_opcode::buffer_store_byte;
+            break;
+         case 2:
+            op = aco_opcode::buffer_store_short;
+            break;
          case 4:
             op = aco_opcode::buffer_store_dword;
             break;
@@ -6854,7 +6837,7 @@ void visit_store_scratch(isel_context *ctx, nir_intrinsic_instr *instr) {
             unreachable("Invalid data size for nir_intrinsic_store_scratch.");
       }
 
-      bld.mubuf(op, rsrc, offset, ctx->program->scratch_offset, write_data, start * elem_size_bytes, true);
+      bld.mubuf(op, rsrc, offset, ctx->program->scratch_offset, write_datas[i], offsets[i], true);
    }
 }