intel/fs: Implement nir_intrinsic_global_atomic_*
authorJason Ekstrand <jason.ekstrand@intel.com>
Mon, 26 Nov 2018 21:15:04 +0000 (15:15 -0600)
committerJason Ekstrand <jason.ekstrand@intel.com>
Fri, 1 Feb 2019 22:11:00 +0000 (16:11 -0600)
eviewed-by: Kenneth Graunke <kenneth@whitecape.org>

src/intel/compiler/brw_disasm.c
src/intel/compiler/brw_eu.h
src/intel/compiler/brw_eu_defines.h
src/intel/compiler/brw_fs.cpp
src/intel/compiler/brw_fs.h
src/intel/compiler/brw_fs_nir.cpp
src/intel/compiler/brw_schedule_instructions.cpp
src/intel/compiler/brw_shader.cpp

index 5624bedd2e7a2d186ec8ed7b4945cf83b495c973..efca3e2ce7d8390ca5c8886c665ae7b85972a49b 100644 (file)
@@ -439,10 +439,13 @@ static const char *const dp_dc1_msg_type_hsw[32] = {
    [HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE] = "DC typed surface write",
    [GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ] = "DC A64 scattered read",
    [GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ] = "DC A64 untyped surface read",
+   [GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP] = "DC A64 untyped atomic op",
    [GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE] = "DC A64 untyped surface write",
    [GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE] = "DC A64 scattered write",
    [GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP] =
       "DC untyped atomic float op",
+   [GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP] =
+      "DC A64 untyped atomic float op",
 };
 
 static const char *const aop[16] = {
@@ -1940,6 +1943,7 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
                case HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2:
                case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2:
                case HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2:
+               case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP:
                   control(file, "atomic op", aop, msg_ctrl & 0xf, &space);
                   break;
                case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ:
@@ -1954,6 +1958,7 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
                   break;
                }
                case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP:
+               case GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP:
                   format(file, "SIMD%d,", (msg_ctrl & (1 << 4)) ? 8 : 16);
                   control(file, "atomic float op", aop_float, msg_ctrl & 0xf,
                           &space);
index ec3aafb836335564154d57ec43e05bcb540138cf..104cbece9b35da12261ae1bf3ba2e0b63381ce0a 100644 (file)
@@ -749,6 +749,46 @@ brw_dp_a64_byte_scattered_rw_desc(const struct gen_device_info *devinfo,
    return brw_dp_desc(devinfo, BRW_BTI_STATELESS, msg_type, msg_control);
 }
 
+static inline uint32_t
+brw_dp_a64_untyped_atomic_desc(const struct gen_device_info *devinfo,
+                               unsigned exec_size, /**< 0 for SIMD4x2 */
+                               unsigned bit_size,
+                               unsigned atomic_op,
+                               bool response_expected)
+{
+   assert(exec_size == 8);
+   assert(devinfo->gen >= 8);
+   assert(bit_size == 32 || bit_size == 64);
+
+   const unsigned msg_type = GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP;
+
+   const unsigned msg_control =
+      SET_BITS(atomic_op, 3, 0) |
+      SET_BITS(bit_size == 64, 4, 4) |
+      SET_BITS(response_expected, 5, 5);
+
+   return brw_dp_desc(devinfo, BRW_BTI_STATELESS, msg_type, msg_control);
+}
+
+static inline uint32_t
+brw_dp_a64_untyped_atomic_float_desc(const struct gen_device_info *devinfo,
+                                     unsigned exec_size,
+                                     unsigned atomic_op,
+                                     bool response_expected)
+{
+   assert(exec_size == 8);
+   assert(devinfo->gen >= 9);
+
+   assert(exec_size > 0);
+   const unsigned msg_type = GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP;
+
+   const unsigned msg_control =
+      SET_BITS(atomic_op, 1, 0) |
+      SET_BITS(response_expected, 5, 5);
+
+   return brw_dp_desc(devinfo, BRW_BTI_STATELESS, msg_type, msg_control);
+}
+
 static inline uint32_t
 brw_dp_typed_atomic_desc(const struct gen_device_info *devinfo,
                          unsigned exec_size,
index 6c2a2f8ef7cde3784d301098f2b6ff4de6265d63..c0fee90fe5fe41d376ad1c88b7c6763fd35883c5 100644 (file)
@@ -424,6 +424,8 @@ enum opcode {
    SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL,
    SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL,
    SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL,
+   SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,
+   SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL,
 
    SHADER_OPCODE_TYPED_ATOMIC,
    SHADER_OPCODE_TYPED_ATOMIC_LOGICAL,
@@ -1185,9 +1187,11 @@ enum brw_message_target {
 #define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE                   13
 #define GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ                   0x10
 #define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ             0x11
+#define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP                0x12
 #define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE            0x19
 #define GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE                  0x1a
 #define GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP              0x1b
+#define GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP          0x1d
 
 /* GEN9 */
 #define GEN9_DATAPORT_RC_RENDER_TARGET_WRITE                        12
index 28a6b44ea498dc949cd783202948cab8e7b7c54c..5a18ba86a961474b950182d8ae6e67912f2116ed 100644 (file)
@@ -797,6 +797,35 @@ fs_inst::components_read(unsigned i) const
       assert(src[2].file == IMM);
       return i == 1 ? src[2].ud : 1;
 
+   case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
+      assert(src[2].file == IMM);
+      if (i == 1) {
+         /* Data source */
+         const unsigned op = src[2].ud;
+         switch (op) {
+         case BRW_AOP_INC:
+         case BRW_AOP_DEC:
+         case BRW_AOP_PREDEC:
+            return 0;
+         case BRW_AOP_CMPWR:
+            return 2;
+         default:
+            return 1;
+         }
+      } else {
+         return 1;
+      }
+
+   case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+      assert(src[2].file == IMM);
+      if (i == 1) {
+         /* Data source */
+         const unsigned op = src[2].ud;
+         return op == BRW_AOP_FCMPWR ? 2 : 1;
+      } else {
+         return 1;
+      }
+
    case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
       /* Scattered logical opcodes use the following params:
        * src[0] Surface coordinates
@@ -5292,6 +5321,18 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst)
                                                true   /* write */);
       break;
 
+   case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
+      desc = brw_dp_a64_untyped_atomic_desc(devinfo, inst->exec_size, 32,
+                                            arg,   /* atomic_op */
+                                            !inst->dst.is_null());
+      break;
+
+   case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+      desc = brw_dp_a64_untyped_atomic_float_desc(devinfo, inst->exec_size,
+                                                  arg,   /* atomic_op */
+                                                  !inst->dst.is_null());
+      break;
+
    default:
       unreachable("Unknown A64 logical instruction");
    }
@@ -5492,6 +5533,8 @@ fs_visitor::lower_logical_sends()
       case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
       case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
       case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
+      case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
+      case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
          lower_a64_logical_send(ibld, inst);
          break;
 
@@ -5998,6 +6041,10 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
    case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
       return devinfo->gen <= 8 ? 8 : MIN2(16, inst->exec_size);
 
+   case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
+   case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+      return 8;
+
    case SHADER_OPCODE_URB_READ_SIMD8:
    case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
    case SHADER_OPCODE_URB_WRITE_SIMD8:
index 5361b7680039555ccc2454ec890108167e72fd4c..73aebbcfb22d38fbf1dd5d87b0f09d5823e49ea0 100644 (file)
@@ -233,6 +233,10 @@ public:
                                int op, nir_intrinsic_instr *instr);
    void nir_emit_shared_atomic_float(const brw::fs_builder &bld,
                                      int op, nir_intrinsic_instr *instr);
+   void nir_emit_global_atomic(const brw::fs_builder &bld,
+                               int op, nir_intrinsic_instr *instr);
+   void nir_emit_global_atomic_float(const brw::fs_builder &bld,
+                                     int op, nir_intrinsic_instr *instr);
    void nir_emit_texture(const brw::fs_builder &bld,
                          nir_tex_instr *instr);
    void nir_emit_jump(const brw::fs_builder &bld,
index 06d45d7700265b12290b6b7d98733a68840ae8bf..1041296b903eb94a6f43c41caf16317239d47a51 100644 (file)
@@ -4029,6 +4029,46 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       }
       break;
 
+   case nir_intrinsic_global_atomic_add:
+      nir_emit_global_atomic(bld, get_op_for_atomic_add(instr, 1), instr);
+      break;
+   case nir_intrinsic_global_atomic_imin:
+      nir_emit_global_atomic(bld, BRW_AOP_IMIN, instr);
+      break;
+   case nir_intrinsic_global_atomic_umin:
+      nir_emit_global_atomic(bld, BRW_AOP_UMIN, instr);
+      break;
+   case nir_intrinsic_global_atomic_imax:
+      nir_emit_global_atomic(bld, BRW_AOP_IMAX, instr);
+      break;
+   case nir_intrinsic_global_atomic_umax:
+      nir_emit_global_atomic(bld, BRW_AOP_UMAX, instr);
+      break;
+   case nir_intrinsic_global_atomic_and:
+      nir_emit_global_atomic(bld, BRW_AOP_AND, instr);
+      break;
+   case nir_intrinsic_global_atomic_or:
+      nir_emit_global_atomic(bld, BRW_AOP_OR, instr);
+      break;
+   case nir_intrinsic_global_atomic_xor:
+      nir_emit_global_atomic(bld, BRW_AOP_XOR, instr);
+      break;
+   case nir_intrinsic_global_atomic_exchange:
+      nir_emit_global_atomic(bld, BRW_AOP_MOV, instr);
+      break;
+   case nir_intrinsic_global_atomic_comp_swap:
+      nir_emit_global_atomic(bld, BRW_AOP_CMPWR, instr);
+      break;
+   case nir_intrinsic_global_atomic_fmin:
+      nir_emit_global_atomic_float(bld, BRW_AOP_FMIN, instr);
+      break;
+   case nir_intrinsic_global_atomic_fmax:
+      nir_emit_global_atomic_float(bld, BRW_AOP_FMAX, instr);
+      break;
+   case nir_intrinsic_global_atomic_fcomp_swap:
+      nir_emit_global_atomic_float(bld, BRW_AOP_FCMPWR, instr);
+      break;
+
    case nir_intrinsic_load_ssbo: {
       assert(devinfo->gen >= 7);
 
@@ -4702,6 +4742,60 @@ fs_visitor::nir_emit_shared_atomic_float(const fs_builder &bld,
    bld.MOV(dest, atomic_result);
 }
 
+void
+fs_visitor::nir_emit_global_atomic(const fs_builder &bld,
+                                   int op, nir_intrinsic_instr *instr)
+{
+   if (stage == MESA_SHADER_FRAGMENT)
+      brw_wm_prog_data(prog_data)->has_side_effects = true;
+
+   fs_reg dest;
+   if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+      dest = get_nir_dest(instr->dest);
+
+   fs_reg addr = get_nir_src(instr->src[0]);
+
+   fs_reg data;
+   if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC)
+      data = get_nir_src(instr->src[1]);
+
+   if (op == BRW_AOP_CMPWR) {
+      fs_reg tmp = bld.vgrf(data.type, 2);
+      fs_reg sources[2] = { data, get_nir_src(instr->src[2]) };
+      bld.LOAD_PAYLOAD(tmp, sources, 2, 0);
+      data = tmp;
+   }
+
+   bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,
+            dest, addr, data, brw_imm_ud(op));
+}
+
+void
+fs_visitor::nir_emit_global_atomic_float(const fs_builder &bld,
+                                         int op, nir_intrinsic_instr *instr)
+{
+   if (stage == MESA_SHADER_FRAGMENT)
+      brw_wm_prog_data(prog_data)->has_side_effects = true;
+
+   assert(nir_intrinsic_infos[instr->intrinsic].has_dest);
+   fs_reg dest = get_nir_dest(instr->dest);
+
+   fs_reg addr = get_nir_src(instr->src[0]);
+
+   assert(op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC);
+   fs_reg data = get_nir_src(instr->src[1]);
+
+   if (op == BRW_AOP_FCMPWR) {
+      fs_reg tmp = bld.vgrf(data.type, 2);
+      fs_reg sources[2] = { data, get_nir_src(instr->src[2]) };
+      bld.LOAD_PAYLOAD(tmp, sources, 2, 0);
+      data = tmp;
+   }
+
+   bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,
+            dest, addr, data, brw_imm_ud(op));
+}
+
 void
 fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
 {
index 692398da496239cf9ae0fb2c2467d5c7be500e04..861b2abfff22e3f610927ef60ce4168ae420af9f 100644 (file)
@@ -499,6 +499,8 @@ schedule_node::set_latency_gen7(bool is_haswell)
          case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2:
          case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP:
          case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP:
+         case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP:
+         case GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP:
             /* See also SHADER_OPCODE_UNTYPED_ATOMIC */
             latency = 14000;
             break;
index c2751557af80573bac1081340c868b58af9f329a..569e68e02afeb52bc37e3911bcf32085f6555b6e 100644 (file)
@@ -302,6 +302,10 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
       return "a64_byte_scattered_read_logical";
    case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
       return "a64_byte_scattered_write_logical";
+   case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
+      return "a64_untyped_atomic_logical";
+   case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+      return "a64_untyped_atomic_float_logical";
    case SHADER_OPCODE_TYPED_ATOMIC:
       return "typed_atomic";
    case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
@@ -1020,6 +1024,8 @@ backend_instruction::has_side_effects() const
    case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
    case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
    case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
+   case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
+   case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
    case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
    case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
    case SHADER_OPCODE_TYPED_ATOMIC: