aco: refactor store_vmem_mubuf() to use new helpers
authorRhys Perry <pendingchaos02@gmail.com>
Thu, 16 Apr 2020 18:20:26 +0000 (19:20 +0100)
committerMarge Bot <eric+marge@anholt.net>
Fri, 24 Apr 2020 18:52:54 +0000 (18:52 +0000)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4639>

src/amd/compiler/aco_instruction_selection.cpp

index 1c2b17223061f66a033397449dddead0ada9d795..931af4dd1774b9d104c0e05986a83a623b04e9bf 100644 (file)
@@ -3606,37 +3606,6 @@ Temp load_lds(isel_context *ctx, unsigned elem_size_bytes, Temp dst,
    return dst;
 }
 
-Temp extract_subvector(isel_context *ctx, Temp data, unsigned start, unsigned size, RegType type)
-{
-   if (start == 0 && size == data.size())
-      return type == RegType::vgpr ? as_vgpr(ctx, data) : data;
-
-   unsigned size_hint = 1;
-   auto it = ctx->allocated_vec.find(data.id());
-   if (it != ctx->allocated_vec.end())
-      size_hint = it->second[0].size();
-   if (size % size_hint || start % size_hint)
-      size_hint = 1;
-
-   start /= size_hint;
-   size /= size_hint;
-
-   Temp elems[size];
-   for (unsigned i = 0; i < size; i++)
-      elems[i] = emit_extract_vector(ctx, data, start + i, RegClass(type, size_hint));
-
-   if (size == 1)
-      return type == RegType::vgpr ? as_vgpr(ctx, elems[0]) : elems[0];
-
-   aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector, Format::PSEUDO, size, 1)};
-   for (unsigned i = 0; i < size; i++)
-      vec->operands[i] = Operand(elems[i]);
-   Temp res = {ctx->program->allocateId(), RegClass(type, size * size_hint)};
-   vec->definitions[0] = Definition(res);
-   ctx->block->instructions.emplace_back(std::move(vec));
-   return res;
-}
-
 void split_store_data(isel_context *ctx, RegType dst_type, unsigned count, Temp *dst, unsigned *offsets, Temp src)
 {
    if (!count)
@@ -3990,35 +3959,17 @@ void store_vmem_mubuf(isel_context *ctx, Temp src, Temp descriptor, Temp voffset
    Builder bld(ctx->program, ctx->block);
    assert(elem_size_bytes == 4 || elem_size_bytes == 8);
    assert(write_mask);
+   write_mask = widen_mask(write_mask, elem_size_bytes);
 
-   if (elem_size_bytes == 8) {
-      elem_size_bytes = 4;
-      write_mask = widen_mask(write_mask, 2);
-   }
-
-   while (write_mask) {
-      int start = 0;
-      int count = 0;
-      u_bit_scan_consecutive_range(&write_mask, &start, &count);
-      assert(count > 0);
-      assert(start >= 0);
-
-      while (count > 0) {
-         unsigned sub_count = allow_combining ? MIN2(count, 4) : 1;
-         unsigned const_offset = (unsigned) start * elem_size_bytes + base_const_offset;
-
-         /* GFX6 doesn't have buffer_store_dwordx3, so make sure not to emit that here either. */
-         if (unlikely(ctx->program->chip_class == GFX6 && sub_count == 3))
-            sub_count = 2;
-
-         Temp elem = extract_subvector(ctx, src, start, sub_count, RegType::vgpr);
-         emit_single_mubuf_store(ctx, descriptor, voffset, soffset, elem, const_offset, reorder, slc);
-
-         count -= sub_count;
-         start += sub_count;
-      }
+   unsigned write_count = 0;
+   Temp write_datas[32];
+   unsigned offsets[32];
+   split_buffer_store(ctx, NULL, false, RegType::vgpr, src, write_mask,
+                      allow_combining ? 16 : 4, &write_count, write_datas, offsets);
 
-      assert(count == 0);
+   for (unsigned i = 0; i < write_count; i++) {
+      unsigned const_offset = offsets[i] + base_const_offset;
+      emit_single_mubuf_store(ctx, descriptor, voffset, soffset, write_datas[i], const_offset, reorder, slc);
    }
 }