intel/fs: Add a generic SEND opcode
authorJason Ekstrand <jason.ekstrand@intel.com>
Mon, 29 Oct 2018 20:06:14 +0000 (15:06 -0500)
committerJason Ekstrand <jason@jlekstrand.net>
Tue, 29 Jan 2019 18:43:55 +0000 (18:43 +0000)
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
src/intel/compiler/brw_eu_defines.h
src/intel/compiler/brw_fs.cpp
src/intel/compiler/brw_fs.h
src/intel/compiler/brw_fs_cse.cpp
src/intel/compiler/brw_fs_generator.cpp
src/intel/compiler/brw_fs_reg_allocate.cpp
src/intel/compiler/brw_schedule_instructions.cpp
src/intel/compiler/brw_shader.cpp
src/intel/compiler/brw_shader.h

index 4640c98140fdb0be0bb5a7a75109238c0a53b137..e52f1b505e97ebba1103e1bde5a75ea1bdefeec2 100644 (file)
@@ -315,6 +315,13 @@ enum opcode {
    SHADER_OPCODE_SIN,
    SHADER_OPCODE_COS,
 
+   /**
+    * A generic "send" opcode.  The first two sources are the message
+    * descriptor and extended message descriptor respectively.  The third
+    * and optional fourth sources are the message payload
+    */
+   SHADER_OPCODE_SEND,
+
    /**
     * Texture sampling opcodes.
     *
index b3e3980f0d0e71f1176f9d7d846395525b6f5f5f..b4e07be14bd4c08a081739dea95ef25886cf8fd4 100644 (file)
@@ -216,6 +216,7 @@ bool
 fs_inst::is_send_from_grf() const
 {
    switch (opcode) {
+   case SHADER_OPCODE_SEND:
    case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
    case SHADER_OPCODE_SHADER_TIME_ADD:
    case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
@@ -848,6 +849,14 @@ unsigned
 fs_inst::size_read(int arg) const
 {
    switch (opcode) {
+   case SHADER_OPCODE_SEND:
+      if (arg == 2) {
+         return mlen * REG_SIZE;
+      } else if (arg == 3) {
+         return ex_mlen * REG_SIZE;
+      }
+      break;
+
    case FS_OPCODE_FB_WRITE:
    case FS_OPCODE_REP_FB_WRITE:
       if (arg == 0) {
@@ -6025,6 +6034,10 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
       fprintf(file, "(mlen: %d) ", inst->mlen);
    }
 
+   if (inst->ex_mlen) {
+      fprintf(file, "(ex_mlen: %d) ", inst->ex_mlen);
+   }
+
    if (inst->eot) {
       fprintf(file, "(EOT) ");
    }
index 68287bcdcea100c907a0e84cd5869f0f7ea324a9..093a5751e2daf6d22c9fb446039f4c46eb1e1d45 100644 (file)
@@ -406,6 +406,12 @@ private:
                       struct brw_reg payload,
                       struct brw_reg implied_header,
                       GLuint nr);
+   void generate_send(fs_inst *inst,
+                      struct brw_reg dst,
+                      struct brw_reg desc,
+                      struct brw_reg ex_desc,
+                      struct brw_reg payload,
+                      struct brw_reg payload2);
    void generate_fb_write(fs_inst *inst, struct brw_reg payload);
    void generate_fb_read(fs_inst *inst, struct brw_reg dst,
                          struct brw_reg payload);
index 6859733d58c45931315815dea4a8fe31ab36d165..19089246c4f5b40c22ce76ca1ad7dc06ad26a349 100644 (file)
@@ -184,8 +184,13 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate)
           a->dst.type == b->dst.type &&
           a->offset == b->offset &&
           a->mlen == b->mlen &&
+          a->ex_mlen == b->ex_mlen &&
+          a->sfid == b->sfid &&
+          a->desc == b->desc &&
           a->size_written == b->size_written &&
           a->base_mrf == b->base_mrf &&
+          a->check_tdr == b->check_tdr &&
+          a->send_has_side_effects == b->send_has_side_effects &&
           a->eot == b->eot &&
           a->header_size == b->header_size &&
           a->shadow_compare == b->shadow_compare &&
index 5fc6cf5f8cc88367ef5a11b6bcd76a722e4c092f..17578fe5ff6b82bc09a9e2c6a7befe4cb1eb5217 100644 (file)
@@ -250,6 +250,33 @@ fs_generator::patch_discard_jumps_to_fb_writes()
    return true;
 }
 
+void
+fs_generator::generate_send(fs_inst *inst,
+                            struct brw_reg dst,
+                            struct brw_reg desc,
+                            struct brw_reg ex_desc,
+                            struct brw_reg payload,
+                            struct brw_reg payload2)
+{
+   /* SENDS not yet supported */
+   assert(ex_desc.file == BRW_IMMEDIATE_VALUE && ex_desc.d == 0);
+   assert(payload2.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+          payload2.nr == BRW_ARF_NULL);
+
+   const bool dst_is_null = dst.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+                            dst.nr == BRW_ARF_NULL;
+   const unsigned rlen = dst_is_null ? 0 : inst->size_written / REG_SIZE;
+
+   uint32_t desc_imm = inst->desc |
+      brw_message_desc(devinfo, inst->mlen, rlen, inst->header_size);
+
+   brw_send_indirect_message(p, inst->sfid, dst, payload, desc, desc_imm);
+
+   brw_inst_set_eot(p->devinfo, brw_last_inst, inst->eot);
+   if (inst->check_tdr)
+      brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC);
+}
+
 void
 fs_generator::fire_fb_write(fs_inst *inst,
                             struct brw_reg payload,
@@ -1807,7 +1834,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
    struct disasm_info *disasm_info = disasm_initialize(devinfo, cfg);
 
    foreach_block_and_inst (block, fs_inst, inst, cfg) {
-      struct brw_reg src[3], dst;
+      struct brw_reg src[4], dst;
       unsigned int last_insn_offset = p->next_insn_offset;
       bool multiple_instructions_emitted = false;
 
@@ -2130,6 +2157,12 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
          src[0].subnr = 4 * type_sz(src[0].type);
          brw_MOV(p, dst, stride(src[0], 8, 4, 1));
          break;
+
+      case SHADER_OPCODE_SEND:
+         generate_send(inst, dst, src[0], src[1], src[2],
+                       inst->ex_mlen > 0 ? src[3] : brw_null_reg());
+         break;
+
       case SHADER_OPCODE_GET_BUFFER_SIZE:
          generate_get_buffer_size(inst, dst, src[0], src[1]);
          break;
index 678afe6bab42335ce77e1369adc323dcc151054c..5db5242452ea9d23480043723a6be3c878d3b0e4 100644 (file)
@@ -617,7 +617,9 @@ fs_visitor::assign_regs(bool allow_spilling, bool spill_all)
           * highest register that works.
           */
          if (inst->eot) {
-            int size = alloc.sizes[inst->src[0].nr];
+            const int vgrf = inst->opcode == SHADER_OPCODE_SEND ?
+                             inst->src[2].nr : inst->src[0].nr;
+            int size = alloc.sizes[vgrf];
             int reg = compiler->fs_reg_sets[rsi].class_to_ra_reg_range[size] - 1;
 
             /* If something happened to spill, we want to push the EOT send
@@ -626,7 +628,7 @@ fs_visitor::assign_regs(bool allow_spilling, bool spill_all)
              */
             reg -= BRW_MAX_MRF(devinfo->gen) - first_used_mrf;
 
-            ra_set_node_reg(g, inst->src[0].nr, reg);
+            ra_set_node_reg(g, vgrf, reg);
             break;
          }
       }
index 9b279df5cf14c7c714776fe052f96e24f6d5d0cf..bc4c2dc5cdbda9b9f7fad6c5f8b2019d00d1e618 100644 (file)
@@ -414,6 +414,13 @@ schedule_node::set_latency_gen7(bool is_haswell)
       latency = is_haswell ? 300 : 600;
       break;
 
+   case SHADER_OPCODE_SEND:
+      switch (inst->sfid) {
+      default:
+         unreachable("Unknown SFID");
+      }
+      break;
+
    default:
       /* 2 cycles:
        * mul(8) g4<1>F g2<0,1,0>F      0.5F            { align1 WE_normal 1Q };
index 97966c951a186fa1f0ca95aaf3b839936b51be06..f1037fcda22b5e9f335a9160abe34b537ce14bcd 100644 (file)
@@ -206,6 +206,9 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
    case SHADER_OPCODE_COS:
       return "cos";
 
+   case SHADER_OPCODE_SEND:
+      return "send";
+
    case SHADER_OPCODE_TEX:
       return "tex";
    case SHADER_OPCODE_TEX_LOGICAL:
@@ -997,6 +1000,9 @@ bool
 backend_instruction::has_side_effects() const
 {
    switch (opcode) {
+   case SHADER_OPCODE_SEND:
+      return send_has_side_effects;
+
    case SHADER_OPCODE_UNTYPED_ATOMIC:
    case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
    case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
@@ -1033,6 +1039,9 @@ bool
 backend_instruction::is_volatile() const
 {
    switch (opcode) {
+   case SHADER_OPCODE_SEND:
+      return send_is_volatile;
+
    case SHADER_OPCODE_UNTYPED_SURFACE_READ:
    case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
    case SHADER_OPCODE_TYPED_SURFACE_READ:
index e1d598b87819c94921e3cc65a865529064d5c70d..45ff19832b7f4990d0b713a8986eaebe726a02d4 100644 (file)
@@ -156,8 +156,11 @@ struct backend_instruction {
 
    uint32_t offset; /**< spill/unspill offset or texture offset bitfield */
    uint8_t mlen; /**< SEND message length */
+   uint8_t ex_mlen; /**< SENDS extended message length */
    int8_t base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
    uint8_t target; /**< MRT target. */
+   uint8_t sfid; /**< SFID for SEND instructions */
+   uint32_t desc; /**< SEND[S] message descriptor immediate */
    unsigned size_written; /**< Data written to the destination register in bytes. */
 
    enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
@@ -170,6 +173,9 @@ struct backend_instruction {
    bool no_dd_check:1;
    bool saturate:1;
    bool shadow_compare:1;
+   bool check_tdr:1; /**< Only valid for SEND; turns it into a SENDC */
+   bool send_has_side_effects:1; /**< Only valid for SHADER_OPCODE_SEND */
+   bool send_is_volatile:1; /**< Only valid for SHADER_OPCODE_SEND */
    bool eot:1;
 
    /* Chooses which flag subregister (f0.0 to f1.1) is used for conditional