intel/fs: Use SHADER_OPCODE_SEND for surface messages
authorJason Ekstrand <jason.ekstrand@intel.com>
Tue, 30 Oct 2018 17:23:44 +0000 (12:23 -0500)
committerJason Ekstrand <jason@jlekstrand.net>
Tue, 29 Jan 2019 18:43:55 +0000 (18:43 +0000)
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
src/intel/compiler/brw_eu.h
src/intel/compiler/brw_eu_emit.c
src/intel/compiler/brw_fs.cpp
src/intel/compiler/brw_fs_generator.cpp
src/intel/compiler/brw_schedule_instructions.cpp

index e59c893d543170be84fe1a8f629befb6f36a1949..c9a8ea948ede77065f5443888908b23d8ba72f3c 100644 (file)
@@ -790,17 +790,6 @@ brw_untyped_atomic(struct brw_codegen *p,
                    bool response_expected,
                    bool header_present);
 
-void
-brw_untyped_atomic_float(struct brw_codegen *p,
-                         struct brw_reg dst,
-                         struct brw_reg payload,
-                         struct brw_reg surface,
-                         unsigned atomic_op,
-                         unsigned msg_length,
-                         bool response_expected,
-                         bool header_present);
-
-
 void
 brw_untyped_surface_read(struct brw_codegen *p,
                          struct brw_reg dst,
@@ -844,22 +833,6 @@ brw_typed_surface_write(struct brw_codegen *p,
                         unsigned num_channels,
                         bool header_present);
 
-void
-brw_byte_scattered_read(struct brw_codegen *p,
-                        struct brw_reg dst,
-                        struct brw_reg payload,
-                        struct brw_reg surface,
-                        unsigned msg_length,
-                        unsigned bit_size);
-
-void
-brw_byte_scattered_write(struct brw_codegen *p,
-                         struct brw_reg payload,
-                         struct brw_reg surface,
-                         unsigned msg_length,
-                         unsigned bit_size,
-                         bool header_present);
-
 void
 brw_memory_fence(struct brw_codegen *p,
                  struct brw_reg dst,
index df63a6c42b77af077afa05b08a3c158a877df4be..4bd01a55cb70f71c8dc1b7bb54b318ab5d733646 100644 (file)
@@ -2785,35 +2785,6 @@ brw_untyped_atomic(struct brw_codegen *p,
                                      payload, surface, desc);
 }
 
-void
-brw_untyped_atomic_float(struct brw_codegen *p,
-                         struct brw_reg dst,
-                         struct brw_reg payload,
-                         struct brw_reg surface,
-                         unsigned atomic_op,
-                         unsigned msg_length,
-                         bool response_expected,
-                         bool header_present)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-
-   assert(devinfo->gen >= 9);
-   assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
-
-   const unsigned sfid = HSW_SFID_DATAPORT_DATA_CACHE_1;
-   const unsigned exec_size = 1 << brw_get_default_exec_size(p);
-   const unsigned response_length =
-      brw_surface_payload_size(p, response_expected, exec_size);
-   const unsigned desc =
-      brw_message_desc(devinfo, msg_length, response_length, header_present) |
-      brw_dp_untyped_atomic_float_desc(devinfo, exec_size, atomic_op,
-                                       response_expected);
-
-   brw_send_indirect_surface_message(p, sfid,
-                                     brw_writemask(dst, WRITEMASK_XYZW),
-                                     payload, surface, desc);
-}
-
 void
 brw_untyped_surface_read(struct brw_codegen *p,
                          struct brw_reg dst,
@@ -2864,49 +2835,6 @@ brw_untyped_surface_write(struct brw_codegen *p,
                                      payload, surface, desc);
 }
 
-void
-brw_byte_scattered_read(struct brw_codegen *p,
-                        struct brw_reg dst,
-                        struct brw_reg payload,
-                        struct brw_reg surface,
-                        unsigned msg_length,
-                        unsigned bit_size)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   assert(devinfo->gen > 7 || devinfo->is_haswell);
-   assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
-   const unsigned exec_size = 1 << brw_get_default_exec_size(p);
-   const unsigned response_length = brw_surface_payload_size(p, 1, exec_size);
-   const unsigned desc =
-      brw_message_desc(devinfo, msg_length, response_length, false) |
-      brw_dp_byte_scattered_rw_desc(devinfo, exec_size, bit_size, false);
-
-   brw_send_indirect_surface_message(p, GEN7_SFID_DATAPORT_DATA_CACHE,
-                                     dst, payload, surface, desc);
-}
-
-void
-brw_byte_scattered_write(struct brw_codegen *p,
-                         struct brw_reg payload,
-                         struct brw_reg surface,
-                         unsigned msg_length,
-                         unsigned bit_size,
-                         bool header_present)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   assert(devinfo->gen > 7 || devinfo->is_haswell);
-   assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
-   const unsigned exec_size = 1 << brw_get_default_exec_size(p);
-   const unsigned desc =
-      brw_message_desc(devinfo, msg_length, 0, header_present) |
-      brw_dp_byte_scattered_rw_desc(devinfo, exec_size, bit_size, true);
-
-   brw_send_indirect_surface_message(p, GEN7_SFID_DATAPORT_DATA_CACHE,
-                                     brw_writemask(brw_null_reg(),
-                                                   WRITEMASK_XYZW),
-                                     payload, surface, desc);
-}
-
 void
 brw_typed_atomic(struct brw_codegen *p,
                  struct brw_reg dst,
index b4e07be14bd4c08a081739dea95ef25886cf8fd4..f6525fe467c195be04ed183b65084c8b0fb15a69 100644 (file)
@@ -4851,8 +4851,7 @@ emit_surface_header(const fs_builder &bld, const fs_reg &sample_mask)
 }
 
 static void
-lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
-                           const fs_reg &sample_mask)
+lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
 {
    const gen_device_info *devinfo = bld.shader->devinfo;
 
@@ -4862,10 +4861,17 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
    const fs_reg &surface = inst->src[2];
    const UNUSED fs_reg &dims = inst->src[3];
    const fs_reg &arg = inst->src[4];
+   assert(arg.file == IMM);
 
    /* Calculate the total number of components of the payload. */
    const unsigned addr_sz = inst->components_read(0);
    const unsigned src_sz = inst->components_read(1);
+
+   const bool is_typed_access =
+      inst->opcode == SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL ||
+      inst->opcode == SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL ||
+      inst->opcode == SHADER_OPCODE_TYPED_ATOMIC_LOGICAL;
+
    /* From the BDW PRM Volume 7, page 147:
     *
     *  "For the Data Cache Data Port*, the header must be present for the
@@ -4876,10 +4882,7 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
     * messages prior to Gen9, since we have to provide a header anyway.  On
     * Gen11+ the header has been removed so we can only use predication.
     */
-   const unsigned header_sz = devinfo->gen < 9 &&
-                              (op == SHADER_OPCODE_TYPED_SURFACE_READ ||
-                               op == SHADER_OPCODE_TYPED_SURFACE_WRITE ||
-                               op == SHADER_OPCODE_TYPED_ATOMIC) ? 1 : 0;
+   const unsigned header_sz = devinfo->gen < 9 && is_typed_access ? 1 : 0;
    const unsigned sz = header_sz + addr_sz + src_sz;
 
    /* Allocate space for the payload. */
@@ -4887,6 +4890,10 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
    const fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
    unsigned n = 0;
 
+   const bool has_side_effects = inst->has_side_effects();
+   fs_reg sample_mask = has_side_effects ? bld.sample_mask_reg() :
+                                           fs_reg(brw_imm_d(0xffff));
+
    /* Construct the payload. */
    if (header_sz)
       components[n++] = emit_surface_header(bld, sample_mask);
@@ -4925,14 +4932,125 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
       }
    }
 
+   uint32_t sfid;
+   switch (inst->opcode) {
+   case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
+   case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
+      /* Byte scattered opcodes go through the normal data cache */
+      sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
+      break;
+
+   case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
+   case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
+   case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+   case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+      /* Untyped Surface messages go through the data cache but the SFID value
+       * changed on Haswell.
+       */
+      sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
+              HSW_SFID_DATAPORT_DATA_CACHE_1 :
+              GEN7_SFID_DATAPORT_DATA_CACHE);
+      break;
+
+   case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
+   case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
+   case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
+      /* Typed surface messages go through the render cache on IVB and the
+       * data cache on HSW+.
+       */
+      sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
+              HSW_SFID_DATAPORT_DATA_CACHE_1 :
+              GEN6_SFID_DATAPORT_RENDER_CACHE);
+      break;
+
+   default:
+      unreachable("Unsupported surface opcode");
+   }
+
+   uint32_t desc;
+   switch (inst->opcode) {
+   case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
+      desc = brw_dp_untyped_surface_rw_desc(devinfo, inst->exec_size,
+                                            arg.ud, /* num_channels */
+                                            false   /* write */);
+      break;
+
+   case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
+      desc = brw_dp_untyped_surface_rw_desc(devinfo, inst->exec_size,
+                                            arg.ud, /* num_channels */
+                                            true    /* write */);
+      break;
+
+   case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
+      desc = brw_dp_byte_scattered_rw_desc(devinfo, inst->exec_size,
+                                           arg.ud, /* bit_size */
+                                           false   /* write */);
+      break;
+
+   case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
+      desc = brw_dp_byte_scattered_rw_desc(devinfo, inst->exec_size,
+                                           arg.ud, /* bit_size */
+                                           true    /* write */);
+      break;
+
+   case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+      desc = brw_dp_untyped_atomic_desc(devinfo, inst->exec_size,
+                                        arg.ud, /* atomic_op */
+                                        !inst->dst.is_null());
+      break;
+
+   case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+      desc = brw_dp_untyped_atomic_float_desc(devinfo, inst->exec_size,
+                                              arg.ud, /* atomic_op */
+                                              !inst->dst.is_null());
+      break;
+
+   case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
+      desc = brw_dp_typed_surface_rw_desc(devinfo, inst->exec_size, inst->group,
+                                          arg.ud, /* num_channels */
+                                          false   /* write */);
+      break;
+
+   case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
+      desc = brw_dp_typed_surface_rw_desc(devinfo, inst->exec_size, inst->group,
+                                          arg.ud, /* num_channels */
+                                          true    /* write */);
+      break;
+
+   case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
+      desc = brw_dp_typed_atomic_desc(devinfo, inst->exec_size, inst->group,
+                                      arg.ud, /* atomic_op */
+                                      !inst->dst.is_null());
+      break;
+
+   default:
+      unreachable("Unknown surface logical instruction");
+   }
+
    /* Update the original instruction. */
-   inst->opcode = op;
+   inst->opcode = SHADER_OPCODE_SEND;
    inst->mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8;
    inst->header_size = header_sz;
+   inst->send_has_side_effects = has_side_effects;
+   inst->send_is_volatile = !has_side_effects;
+
+   /* Set up SFID and descriptors */
+   inst->sfid = sfid;
+   inst->desc = desc;
+   if (surface.file == IMM) {
+      inst->desc |= surface.ud & 0xff;
+      inst->src[0] = brw_imm_ud(0);
+   } else {
+      const fs_builder ubld = bld.exec_all().group(1, 0);
+      fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+      ubld.AND(tmp, surface, brw_imm_ud(0xff));
+      inst->src[0] = component(tmp, 0);
+   }
+   inst->src[1] = brw_imm_ud(0); /* ex_desc */
+
+   /* Finally, the payload */
+   inst->src[2] = payload;
 
-   inst->src[0] = payload;
-   inst->src[1] = surface;
-   inst->src[2] = arg;
    inst->resize_sources(3);
 
    delete[] components;
@@ -5076,57 +5194,15 @@ fs_visitor::lower_logical_sends()
          break;
 
       case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
-         lower_surface_logical_send(ibld, inst,
-                                    SHADER_OPCODE_UNTYPED_SURFACE_READ,
-                                    fs_reg());
-         break;
-
       case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
-         lower_surface_logical_send(ibld, inst,
-                                    SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
-                                    ibld.sample_mask_reg());
-         break;
-
       case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
-         lower_surface_logical_send(ibld, inst,
-                                    SHADER_OPCODE_BYTE_SCATTERED_READ,
-                                    fs_reg());
-         break;
-
       case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
-         lower_surface_logical_send(ibld, inst,
-                                    SHADER_OPCODE_BYTE_SCATTERED_WRITE,
-                                    ibld.sample_mask_reg());
-         break;
-
       case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
-         lower_surface_logical_send(ibld, inst,
-                                    SHADER_OPCODE_UNTYPED_ATOMIC,
-                                    ibld.sample_mask_reg());
-         break;
-
       case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
-         lower_surface_logical_send(ibld, inst,
-                                    SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT,
-                                    ibld.sample_mask_reg());
-         break;
-
       case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
-         lower_surface_logical_send(ibld, inst,
-                                    SHADER_OPCODE_TYPED_SURFACE_READ,
-                                    brw_imm_d(0xffff));
-         break;
-
       case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
-         lower_surface_logical_send(ibld, inst,
-                                    SHADER_OPCODE_TYPED_SURFACE_WRITE,
-                                    ibld.sample_mask_reg());
-         break;
-
       case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
-         lower_surface_logical_send(ibld, inst,
-                                    SHADER_OPCODE_TYPED_ATOMIC,
-                                    ibld.sample_mask_reg());
+         lower_surface_logical_send(ibld, inst);
          break;
 
       case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
index 17578fe5ff6b82bc09a9e2c6a7befe4cb1eb5217..0c9feb63a8c476ccf5beda98091761ceaafbc53a 100644 (file)
@@ -2264,68 +2264,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
          generate_shader_time_add(inst, src[0], src[1], src[2]);
          break;
 
-      case SHADER_OPCODE_UNTYPED_ATOMIC:
-         assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud,
-                            inst->mlen, !inst->dst.is_null(),
-                            inst->header_size);
-         break;
-
-      case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
-         assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_untyped_atomic_float(p, dst, src[0], src[1], src[2].ud,
-                                  inst->mlen, !inst->dst.is_null(),
-                                  inst->header_size);
-         break;
-
-      case SHADER_OPCODE_UNTYPED_SURFACE_READ:
-         assert(!inst->header_size);
-         assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_untyped_surface_read(p, dst, src[0], src[1],
-                                  inst->mlen, src[2].ud);
-         break;
-
-      case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
-         assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_untyped_surface_write(p, src[0], src[1],
-                                   inst->mlen, src[2].ud,
-                                   inst->header_size);
-         break;
-
-      case SHADER_OPCODE_BYTE_SCATTERED_READ:
-         assert(!inst->header_size);
-         assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_byte_scattered_read(p, dst, src[0], src[1],
-                                 inst->mlen, src[2].ud);
-         break;
-
-      case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
-         assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_byte_scattered_write(p, src[0], src[1],
-                                  inst->mlen, src[2].ud,
-                                  inst->header_size);
-         break;
-
-      case SHADER_OPCODE_TYPED_ATOMIC:
-         assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_typed_atomic(p, dst, src[0], src[1],
-                          src[2].ud, inst->mlen, !inst->dst.is_null(),
-                          inst->header_size);
-         break;
-
-      case SHADER_OPCODE_TYPED_SURFACE_READ:
-         assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_typed_surface_read(p, dst, src[0], src[1],
-                                inst->mlen, src[2].ud,
-                                inst->header_size);
-         break;
-
-      case SHADER_OPCODE_TYPED_SURFACE_WRITE:
-         assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].ud,
-                                 inst->header_size);
-         break;
-
       case SHADER_OPCODE_MEMORY_FENCE:
          brw_memory_fence(p, dst, BRW_OPCODE_SEND);
          break;
index bc4c2dc5cdbda9b9f7fad6c5f8b2019d00d1e618..f453bb42574c43e7d231b94ff859040b4edb6412 100644 (file)
@@ -416,6 +416,78 @@ schedule_node::set_latency_gen7(bool is_haswell)
 
    case SHADER_OPCODE_SEND:
       switch (inst->sfid) {
+      case GEN6_SFID_DATAPORT_RENDER_CACHE:
+         switch ((inst->desc >> 14) & 0x1f) {
+         case GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE:
+         case GEN7_DATAPORT_RC_TYPED_SURFACE_READ:
+            /* See also SHADER_OPCODE_TYPED_SURFACE_READ */
+            assert(!is_haswell);
+            latency = 600;
+            break;
+
+         case GEN7_DATAPORT_RC_TYPED_ATOMIC_OP:
+            /* See also SHADER_OPCODE_TYPED_ATOMIC */
+            assert(!is_haswell);
+            latency = 14000;
+            break;
+
+         default:
+            unreachable("Unknown render cache message");
+         }
+         break;
+
+      case GEN7_SFID_DATAPORT_DATA_CACHE:
+         switch ((inst->desc >> 14) & 0x1f) {
+         case HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ:
+         case HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE:
+            /* We have no data for this but assume it's roughly the same as
+             * untyped surface read/write.
+             */
+            latency = 300;
+            break;
+
+         case GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ:
+         case GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE:
+            /* See also SHADER_OPCODE_UNTYPED_SURFACE_READ */
+            assert(!is_haswell);
+            latency = 600;
+            break;
+
+         case GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP:
+            /* See also SHADER_OPCODE_UNTYPED_ATOMIC */
+            assert(!is_haswell);
+            latency = 14000;
+            break;
+
+         default:
+            unreachable("Unknown data cache message");
+         }
+         break;
+
+      case HSW_SFID_DATAPORT_DATA_CACHE_1:
+         switch ((inst->desc >> 14) & 0x1f) {
+         case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ:
+         case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE:
+         case HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ:
+         case HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE:
+            /* See also SHADER_OPCODE_UNTYPED_SURFACE_READ */
+            latency = 300;
+            break;
+
+         case HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP:
+         case HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2:
+         case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2:
+         case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP:
+         case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP:
+            /* See also SHADER_OPCODE_UNTYPED_ATOMIC */
+            latency = 14000;
+            break;
+
+         default:
+            unreachable("Unknown data cache message");
+         }
+         break;
+
       default:
          unreachable("Unknown SFID");
       }