r600/sfn: Add imageio support
authorGert Wollny <gert.wollny@collabora.com>
Wed, 6 May 2020 22:08:15 +0000 (00:08 +0200)
committerMarge Bot <eric+marge@anholt.net>
Fri, 19 Jun 2020 06:58:07 +0000 (06:58 +0000)
Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5206>

src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp
src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h
src/gallium/drivers/r600/sfn/sfn_shader_base.cpp

index d4879138a85ef5f75f0bf20a5ceccb29e0ccf8ee..f8a843bc61be503dc3ea9398d69813d4cfa2f576 100644 (file)
@@ -4,9 +4,40 @@
 #include "sfn_instruction_gds.h"
 #include "sfn_instruction_misc.h"
 #include "../r600_pipe.h"
+#include "../r600_asm.h"
 
 namespace r600 {
 
+EmitSSBOInstruction::EmitSSBOInstruction(ShaderFromNirProcessor& processor):
+   EmitInstruction(processor),
+   m_require_rat_return_address(false)
+{
+}
+
+
+void EmitSSBOInstruction::set_require_rat_return_address()
+{
+   m_require_rat_return_address = true;
+}
+
+bool
+EmitSSBOInstruction::load_rat_return_address()
+{
+   if (m_require_rat_return_address) {
+      m_rat_return_address = get_temp_vec4();
+      emit_instruction(new AluInstruction(op1_mbcnt_32lo_accum_prev_int, m_rat_return_address.reg_i(0), literal(-1), {alu_write}));
+      emit_instruction(new AluInstruction(op1_mbcnt_32hi_int, m_rat_return_address.reg_i(1), literal(-1), {alu_write}));
+      emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(2), PValue(new InlineConstValue(ALU_SRC_SE_ID, 0)),
+                                          literal(256), PValue(new InlineConstValue(ALU_SRC_HW_WAVE_ID, 0)), {alu_write, alu_last_instr}));
+      emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(1),
+                                          m_rat_return_address.reg_i(2), literal(0x40), m_rat_return_address.reg_i(0),
+      {alu_write, alu_last_instr}));
+      m_require_rat_return_address = false;
+   }
+   return true;
+}
+
+
 bool EmitSSBOInstruction::do_emit(nir_instr* instr)
 {
    const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
@@ -29,8 +60,24 @@ bool EmitSSBOInstruction::do_emit(nir_instr* instr)
       return emit_atomic_pre_dec(intr);
    case nir_intrinsic_load_ssbo:
        return emit_load_ssbo(intr);
-    case nir_intrinsic_store_ssbo:
+   case nir_intrinsic_store_ssbo:
       return emit_store_ssbo(intr);
+   case nir_intrinsic_ssbo_atomic_add:
+      return emit_ssbo_atomic_op(intr);
+   case nir_intrinsic_image_store:
+      return emit_image_store(intr);
+   case nir_intrinsic_image_load:
+   case nir_intrinsic_image_atomic_add:
+   case nir_intrinsic_image_atomic_and:
+   case nir_intrinsic_image_atomic_or:
+   case nir_intrinsic_image_atomic_xor:
+   case nir_intrinsic_image_atomic_exchange:
+   case nir_intrinsic_image_atomic_comp_swap:
+   case nir_intrinsic_image_atomic_umin:
+   case nir_intrinsic_image_atomic_umax:
+   case nir_intrinsic_image_atomic_imin:
+   case nir_intrinsic_image_atomic_imax:
+      return emit_image_load(intr);
    default:
       return false;
    }
@@ -111,6 +158,48 @@ ESDOp EmitSSBOInstruction::get_opcode(const nir_intrinsic_op opcode)
    }
 }
 
+RatInstruction::ERatOp
+EmitSSBOInstruction::get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const
+{
+   switch (opcode) {
+   case nir_intrinsic_ssbo_atomic_add:
+   case nir_intrinsic_image_atomic_add:
+      return RatInstruction::ADD_RTN;
+   case nir_intrinsic_ssbo_atomic_and:
+   case nir_intrinsic_image_atomic_and:
+      return RatInstruction::AND_RTN;
+   case nir_intrinsic_ssbo_atomic_exchange:
+   case nir_intrinsic_image_atomic_exchange:
+      return RatInstruction::XCHG_RTN;
+   case nir_intrinsic_ssbo_atomic_or:
+   case nir_intrinsic_image_atomic_or:
+      return RatInstruction::OR_RTN;
+   case nir_intrinsic_ssbo_atomic_imin:
+   case nir_intrinsic_image_atomic_imin:
+      return RatInstruction::MIN_INT_RTN;
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_image_atomic_imax:
+      return RatInstruction::MAX_INT_RTN;
+   case nir_intrinsic_ssbo_atomic_umin:
+   case nir_intrinsic_image_atomic_umin:
+      return RatInstruction::MIN_UINT_RTN;
+   case nir_intrinsic_ssbo_atomic_umax:
+   case nir_intrinsic_image_atomic_umax:
+      return RatInstruction::MAX_UINT_RTN;
+   case nir_intrinsic_image_atomic_xor:
+      return RatInstruction::XOR_RTN;
+   case nir_intrinsic_image_atomic_comp_swap:
+      if (util_format_is_float(format))
+         return RatInstruction::CMPXCHG_FLT_RTN;
+      else
+         return RatInstruction::CMPXCHG_INT_RTN;
+   case nir_intrinsic_image_load:
+      return RatInstruction::NOP_RTN;
+   default:
+      unreachable("Unsupported RAT instruction");
+   }
+}
+
 
 bool EmitSSBOInstruction::emit_atomic_add(const nir_intrinsic_instr* instr)
 {
@@ -127,22 +216,19 @@ bool EmitSSBOInstruction::emit_atomic_add(const nir_intrinsic_instr* instr)
    return true;
 }
 
-bool EmitSSBOInstruction::emit_atomic_inc(const nir_intrinsic_instr* instr)
+bool EmitSSBOInstruction::load_atomic_inc_limits()
 {
-   GPRVector dest = make_dest(instr);
+   m_atomic_update = get_temp_register();
+   emit_instruction(new AluInstruction(op1_mov, m_atomic_update, literal(1),
+   {alu_write, alu_last_instr}));
+   return true;
+}
 
+bool EmitSSBOInstruction::emit_atomic_inc(const nir_intrinsic_instr* instr)
+{
    PValue uav_id = from_nir(instr->src[0], 0);
-
-
-   if (!m_atomic_limit) {
-      int one_tmp = allocate_temp_register();
-      m_atomic_limit = PValue(new GPRValue(one_tmp, 0));
-      emit_instruction(new AluInstruction(op1_mov, m_atomic_limit,
-                       PValue(new LiteralValue(0xffffffff)),
-                       {alu_write, alu_last_instr}));
-   }
-
-   auto ir = new GDSInstr(DS_OP_INC_RET, dest, m_atomic_limit, uav_id,
+   GPRVector dest = make_dest(instr);
+   auto ir = new GDSInstr(DS_OP_ADD_RET, dest, m_atomic_update, uav_id,
                           nir_intrinsic_base(instr));
    emit_instruction(ir);
    return true;
@@ -154,18 +240,10 @@ bool EmitSSBOInstruction::emit_atomic_pre_dec(const nir_intrinsic_instr *instr)
 
    PValue uav_id = from_nir(instr->src[0], 0);
 
-   int one_tmp = allocate_temp_register();
-   PValue value(new GPRValue(one_tmp, 0));
-   emit_instruction(new AluInstruction(op1_mov, value,  Value::one_i,
-                    {alu_write, alu_last_instr}));
-
-   auto ir = new GDSInstr(DS_OP_SUB_RET, dest, value, uav_id,
+   auto ir = new GDSInstr(DS_OP_SUB_RET, dest, m_atomic_update, uav_id,
                           nir_intrinsic_base(instr));
    emit_instruction(ir);
 
-   ir = new GDSInstr(DS_OP_READ_RET, dest, uav_id, nir_intrinsic_base(instr));
-   emit_instruction(ir);
-
    return true;
 }
 
@@ -260,6 +338,169 @@ bool EmitSSBOInstruction::emit_store_ssbo(const nir_intrinsic_instr* instr)
    return true;
 }
 
+bool
+EmitSSBOInstruction::emit_image_store(const nir_intrinsic_instr *intrin)
+{
+   int imageid = 0;
+   PValue image_offset;
+
+   if (nir_src_is_const(intrin->src[0]))
+      imageid = nir_src_as_int(intrin->src[0]);
+   else
+      image_offset = from_nir(intrin->src[0], 0);
+
+   auto coord =  vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, {0,1,2,3});
+   auto undef = from_nir(intrin->src[2], 0);
+   auto value = vec_from_nir_with_fetch_constant(intrin->src[3],  0xf, {0,1,2,3});
+   auto unknown  = from_nir(intrin->src[4], 0);
+
+   if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
+       nir_intrinsic_image_array(intrin)) {
+      emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
+      emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
+   }
+
+   auto store = new RatInstruction(cf_mem_rat, RatInstruction::STORE_TYPED, value, coord, imageid,
+                                   image_offset, 1, 0xf, 0, false);
+   emit_instruction(store);
+   return true;
+}
+
+bool
+EmitSSBOInstruction::emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin)
+{
+   int imageid = 0;
+   PValue image_offset;
+
+   if (nir_src_is_const(intrin->src[0]))
+      imageid = nir_src_as_int(intrin->src[0]);
+   else
+      image_offset = from_nir(intrin->src[0], 0);
+
+   auto opcode = EmitSSBOInstruction::get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT);
+
+   auto coord =  from_nir_with_fetch_constant(intrin->src[1], 0);
+
+   emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0), from_nir(intrin->src[2], 0), write));
+   emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(2), Value::zero, last_write));
+
+   GPRVector out_vec({coord, coord, coord, coord});
+
+   auto atomic = new RatInstruction(cf_mem_rat, opcode, m_rat_return_address, out_vec, imageid,
+                                   image_offset, 1, 0xf, 0, true);
+   emit_instruction(atomic);
+   emit_instruction(new WaitAck(0));
+
+   GPRVector dest = vec_from_nir(intrin->dest, intrin->dest.ssa.num_components);
+   auto fetch = new FetchInstruction(vc_fetch,
+                                     no_index_offset,
+                                     fmt_32,
+                                     vtx_nf_int,
+                                     vtx_es_none,
+                                     m_rat_return_address.reg_i(1),
+                                     dest,
+                                     0,
+                                     false,
+                                     0xf,
+                                     R600_IMAGE_IMMED_RESOURCE_OFFSET,
+                                     0,
+                                     bim_none,
+                                     false,
+                                     false,
+                                     0,
+                                     0,
+                                     0,
+                                     PValue(),
+                                     {0,7,7,7});
+   fetch->set_flag(vtx_srf_mode);
+   fetch->set_flag(vtx_use_tc);
+   emit_instruction(fetch);
+   return true;
+
+}
+
+bool
+EmitSSBOInstruction::emit_image_load(const nir_intrinsic_instr *intrin)
+{
+   int imageid = 0;
+   PValue image_offset;
+
+   if (nir_src_is_const(intrin->src[0]))
+      imageid = nir_src_as_int(intrin->src[0]);
+   else
+      image_offset = from_nir(intrin->src[0], 0);
+
+   auto rat_op = get_rat_opcode(intrin->intrinsic, nir_intrinsic_format(intrin));
+
+   GPRVector::Swizzle swz = {0,1,2,3};
+   auto coord =  vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, swz);
+
+   if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
+       nir_intrinsic_image_array(intrin)) {
+      emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
+      emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
+   }
+
+   if (intrin->intrinsic != nir_intrinsic_image_load) {
+      if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) {
+         emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
+                                             from_nir(intrin->src[4], 0), {alu_write}));
+         emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(3),
+                                             from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
+      } else {
+         emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
+                                             from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
+      }
+   }
+
+   auto store = new RatInstruction(cf_mem_rat, rat_op, m_rat_return_address, coord, imageid,
+                                   image_offset, 1, 0xf, 0, true);
+   emit_instruction(store);
+   return fetch_return_value(intrin);
+}
+
+bool EmitSSBOInstruction::fetch_return_value(const nir_intrinsic_instr *intrin)
+{
+   emit_instruction(new WaitAck(0));
+
+   pipe_format format = nir_intrinsic_format(intrin);
+   unsigned fmt = fmt_32;
+   unsigned num_format = 0;
+   unsigned format_comp = 0;
+   unsigned endian = 0;
+
+   r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
+
+   GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
+   auto fetch = new FetchInstruction(vc_fetch,
+                                     no_index_offset,
+                                     (EVTXDataFormat)fmt,
+                                     (EVFetchNumFormat)num_format,
+                                     (EVFetchEndianSwap)endian,
+                                     m_rat_return_address.reg_i(1),
+                                     dest,
+                                     0,
+                                     false,
+                                     0x3,
+                                     R600_IMAGE_IMMED_RESOURCE_OFFSET,
+                                     0,
+                                     bim_none,
+                                     false,
+                                     false,
+                                     0,
+                                     0,
+                                     0,
+                                     PValue(),
+                                     {0,1,2,3});
+   fetch->set_flag(vtx_srf_mode);
+   fetch->set_flag(vtx_use_tc);
+   if (format_comp)
+      fetch->set_flag(vtx_format_comp_signed);
+
+   emit_instruction(fetch);
+   return true;
+}
+
 GPRVector EmitSSBOInstruction::make_dest(const nir_intrinsic_instr* ir)
 {
    GPRVector::Values v;
index f2951c9b3fb6dbdecd84fa25dcd2498419f3ebe4..699075d744bf0c44bb0ddc14a77cb444505dec67 100644 (file)
@@ -2,12 +2,18 @@
 #define SFN_EMITSSBOINSTRUCTION_H
 
 #include "sfn_emitinstruction.h"
+#include "sfn_instruction_gds.h"
 
 namespace r600 {
 
 class EmitSSBOInstruction: public EmitInstruction {
 public:
-   using EmitInstruction::EmitInstruction;
+   EmitSSBOInstruction(ShaderFromNirProcessor& processor);
+
+   void set_require_rat_return_address();
+   bool load_rat_return_address();
+   bool load_atomic_inc_limits();
+
 private:
    bool do_emit(nir_instr *instr);
 
@@ -19,11 +25,22 @@ private:
 
    bool emit_load_ssbo(const nir_intrinsic_instr* instr);
    bool emit_store_ssbo(const nir_intrinsic_instr* instr);
+
+   bool emit_image_load(const nir_intrinsic_instr *intrin);
+   bool emit_image_store(const nir_intrinsic_instr *intrin);
+   bool emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin);
+
+   bool fetch_return_value(const nir_intrinsic_instr *intrin);
+
    ESDOp get_opcode(nir_intrinsic_op opcode);
+   RatInstruction::ERatOp get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const;
 
    GPRVector make_dest(const nir_intrinsic_instr* instr);
 
-   PValue m_atomic_limit;
+   PValue m_atomic_update;
+
+   bool m_require_rat_return_address;
+   GPRVector m_rat_return_address;
 };
 
 }
index be869921fe1458eca9fdb3421a6d77d1da3623f5..92d65c574bf9ef59004f0a79a9d758710888366d 100644 (file)
@@ -93,6 +93,36 @@ bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
       nir_tex_instr *t = nir_instr_as_tex(instr);
       if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF)
          sh_info().uses_tex_buffers = true;
+      break;
+   }
+   case nir_instr_type_intrinsic: {
+      auto *i = nir_instr_as_intrinsic(instr);
+      switch (i->intrinsic) {
+      case nir_intrinsic_image_load:
+      case nir_intrinsic_ssbo_atomic_add:
+      case nir_intrinsic_image_atomic_add:
+      case nir_intrinsic_ssbo_atomic_and:
+      case nir_intrinsic_image_atomic_and:
+      case nir_intrinsic_ssbo_atomic_or:
+      case nir_intrinsic_image_atomic_or:
+      case nir_intrinsic_ssbo_atomic_imin:
+      case nir_intrinsic_image_atomic_imin:
+      case nir_intrinsic_ssbo_atomic_imax:
+      case nir_intrinsic_image_atomic_imax:
+      case nir_intrinsic_ssbo_atomic_umin:
+      case nir_intrinsic_image_atomic_umin:
+      case nir_intrinsic_ssbo_atomic_umax:
+      case nir_intrinsic_image_atomic_umax:
+      case nir_intrinsic_image_atomic_xor:
+      case nir_intrinsic_image_atomic_exchange:
+      case nir_intrinsic_image_atomic_comp_swap:
+         m_ssbo_instr.set_require_rat_return_address();
+         m_sel.info.writes_memory = 1;
+         break;
+      default:
+         ;
+      }
+
    }
    default:
       ;
@@ -490,6 +520,11 @@ bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* ins
    if (emit_intrinsic_instruction_override(instr))
       return true;
 
+   if (m_ssbo_instr.emit(&instr->instr)) {
+      m_sel.info.writes_memory = true;
+      return true;
+   }
+
    switch (instr->intrinsic) {
    case nir_intrinsic_load_deref: {
       auto var = get_deref_location(instr->src[0]);
@@ -524,39 +559,24 @@ bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* ins
       return emit_discard_if(instr);
    case nir_intrinsic_load_ubo_r600:
       return emit_load_ubo(instr);
-   case nir_intrinsic_atomic_counter_add:
-   case nir_intrinsic_atomic_counter_and:
-   case nir_intrinsic_atomic_counter_exchange:
-   case nir_intrinsic_atomic_counter_max:
-   case nir_intrinsic_atomic_counter_min:
-   case nir_intrinsic_atomic_counter_or:
-   case nir_intrinsic_atomic_counter_xor:
-   case nir_intrinsic_atomic_counter_comp_swap:
-   case nir_intrinsic_atomic_counter_read:
-   case nir_intrinsic_atomic_counter_post_dec:
-   case nir_intrinsic_atomic_counter_inc:
-   case nir_intrinsic_atomic_counter_pre_dec:
-   case nir_intrinsic_store_ssbo:
-      m_sel.info.writes_memory = true;
-      /* fallthrough */
-   case nir_intrinsic_load_ssbo:
-      return m_ssbo_instr.emit(&instr->instr);
-      break;
-   case nir_intrinsic_copy_deref:
-   case nir_intrinsic_load_constant:
-   case nir_intrinsic_load_input:
-   case nir_intrinsic_store_output:
    case nir_intrinsic_load_tcs_in_param_base_r600:
       return emit_load_tcs_param_base(instr, 0);
    case nir_intrinsic_load_tcs_out_param_base_r600:
       return emit_load_tcs_param_base(instr, 16);
    case nir_intrinsic_load_local_shared_r600:
+   case nir_intrinsic_load_shared:
       return emit_load_local_shared(instr);
    case nir_intrinsic_store_local_shared_r600:
+   case nir_intrinsic_store_shared:
       return emit_store_local_shared(instr);
    case nir_intrinsic_control_barrier:
    case nir_intrinsic_memory_barrier_tcs_patch:
+   case nir_intrinsic_memory_barrier_shared:
       return emit_barrier(instr);
+   case nir_intrinsic_copy_deref:
+   case nir_intrinsic_load_constant:
+   case nir_intrinsic_load_input:
+   case nir_intrinsic_store_output:
 
    default:
       fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic);