intel/eu: Add an EOT parameter to send_indirect_[split]_message
authorJason Ekstrand <jason.ekstrand@intel.com>
Thu, 7 Feb 2019 23:45:51 +0000 (17:45 -0600)
committerJason Ekstrand <jason.ekstrand@intel.com>
Mon, 25 Feb 2019 17:35:12 +0000 (11:35 -0600)
For split indirect sends we have to put the EOT parameter in the
extended descriptor as well as the instruction itself so just calling
brw_inst_set_eot is insufficient.  Moving the EOT handling handling into
the send_indirect_[split]_message helper lets us handle it properly.

src/intel/compiler/brw_eu.h
src/intel/compiler/brw_eu_emit.c
src/intel/compiler/brw_fs_generator.cpp
src/intel/compiler/brw_vec4_generator.cpp

index 104cbece9b35da12261ae1bf3ba2e0b63381ce0a..ac8ff69a7e00bdf34b3ad6ce63907657126697a1 100644 (file)
@@ -911,7 +911,8 @@ brw_send_indirect_message(struct brw_codegen *p,
                           struct brw_reg dst,
                           struct brw_reg payload,
                           struct brw_reg desc,
-                          unsigned desc_imm);
+                          unsigned desc_imm,
+                          bool eot);
 
 void
 brw_send_indirect_split_message(struct brw_codegen *p,
@@ -922,7 +923,8 @@ brw_send_indirect_split_message(struct brw_codegen *p,
                                 struct brw_reg desc,
                                 unsigned desc_imm,
                                 struct brw_reg ex_desc,
-                                unsigned ex_desc_imm);
+                                unsigned ex_desc_imm,
+                                bool eot);
 
 void brw_ff_sync(struct brw_codegen *p,
                   struct brw_reg dest,
index 9be82d1b87cb3812e5ed03cb3dc54e349e2722ff..4440c84760b7253fe221e32f73d150a5d7be71a1 100644 (file)
@@ -2481,7 +2481,8 @@ brw_send_indirect_message(struct brw_codegen *p,
                           struct brw_reg dst,
                           struct brw_reg payload,
                           struct brw_reg desc,
-                          unsigned desc_imm)
+                          unsigned desc_imm,
+                          bool eot)
 {
    const struct gen_device_info *devinfo = p->devinfo;
    struct brw_inst *send;
@@ -2518,6 +2519,7 @@ brw_send_indirect_message(struct brw_codegen *p,
    brw_set_dest(p, send, dst);
    brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
    brw_inst_set_sfid(devinfo, send, sfid);
+   brw_inst_set_eot(devinfo, send, eot);
 }
 
 void
@@ -2529,7 +2531,8 @@ brw_send_indirect_split_message(struct brw_codegen *p,
                                 struct brw_reg desc,
                                 unsigned desc_imm,
                                 struct brw_reg ex_desc,
-                                unsigned ex_desc_imm)
+                                unsigned ex_desc_imm,
+                                bool eot)
 {
    const struct gen_device_info *devinfo = p->devinfo;
    struct brw_inst *send;
@@ -2574,13 +2577,13 @@ brw_send_indirect_split_message(struct brw_codegen *p,
        * so the caller can specify additional descriptor bits with the
        * desc_imm immediate.
        *
-       * Even though the instruction dispatcher always pulls the SFID from the
-       * instruction itself, the extended descriptor sent to the actual unit
-       * gets the SFID from the extended descriptor which comes from the
-       * address register.  If we don't OR it in, the external unit gets
-       * confused and hangs the GPU.
+       * Even though the instruction dispatcher always pulls the SFID and EOT
+       * fields from the instruction itself, actual external unit which
+       * processes the message gets the SFID and EOT from the extended
+       * descriptor which comes from the address register.  If we don't OR
+       * those two bits in, the external unit may get confused and hang.
        */
-      brw_OR(p, addr, ex_desc, brw_imm_ud(ex_desc_imm | sfid));
+      brw_OR(p, addr, ex_desc, brw_imm_ud(ex_desc_imm | sfid | eot << 5));
 
       brw_pop_insn_state(p);
       ex_desc = addr;
@@ -2613,6 +2616,7 @@ brw_send_indirect_split_message(struct brw_codegen *p,
    }
 
    brw_inst_set_sfid(devinfo, send, sfid);
+   brw_inst_set_eot(devinfo, send, eot);
 }
 
 static void
@@ -2645,7 +2649,7 @@ brw_send_indirect_surface_message(struct brw_codegen *p,
       surface = addr;
    }
 
-   brw_send_indirect_message(p, sfid, dst, payload, surface, desc_imm);
+   brw_send_indirect_message(p, sfid, dst, payload, surface, desc_imm, false);
 }
 
 static bool
@@ -3164,7 +3168,8 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
                              dest,
                              mrf,
                              vec1(data),
-                             desc);
+                             desc,
+                             false);
 }
 
 void
index 1b9ef490502eb3ecba40a101335e190287609b36..649dd999a0e4bb7a0fe2dd74b9ee22ff40e27dd3 100644 (file)
@@ -279,16 +279,16 @@ fs_generator::generate_send(fs_inst *inst,
        * also covers the dual-payload case because ex_mlen goes in ex_desc.
        */
       brw_send_indirect_split_message(p, inst->sfid, dst, payload, payload2,
-                                      desc, desc_imm, ex_desc, ex_desc_imm);
+                                      desc, desc_imm, ex_desc, ex_desc_imm,
+                                      inst->eot);
       if (inst->check_tdr)
          brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDSC);
    } else {
-      brw_send_indirect_message(p, inst->sfid, dst, payload, desc, desc_imm);
+      brw_send_indirect_message(p, inst->sfid, dst, payload, desc, desc_imm,
+                                   inst->eot);
       if (inst->check_tdr)
          brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC);
    }
-
-   brw_inst_set_eot(p->devinfo, brw_last_inst, inst->eot);
 }
 
 void
@@ -1463,7 +1463,8 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
          brw_dp_read_desc(devinfo, 0 /* surface */,
                           BRW_DATAPORT_OWORD_BLOCK_DWORDS(inst->exec_size),
                           GEN7_DATAPORT_DC_OWORD_BLOCK_READ,
-                          BRW_DATAPORT_READ_TARGET_DATA_CACHE));
+                          BRW_DATAPORT_READ_TARGET_DATA_CACHE),
+         false /* EOT */);
 
       brw_pop_insn_state(p);
    }
index 93baaef3ab771399523f10601756424fc271f339..e473d3e242546cc6043322e27afa71c7b6aefd95 100644 (file)
@@ -330,7 +330,8 @@ generate_tex(struct brw_codegen *p,
                           0 /* sampler */,
                           msg_type,
                           BRW_SAMPLER_SIMD_MODE_SIMD4X2,
-                          return_format));
+                          return_format),
+         false /* EOT */);
 
       /* visitor knows more than we do about the surface limit required,
        * so has already done marking.
@@ -1400,7 +1401,8 @@ generate_pull_constant_load_gen7(struct brw_codegen *p,
                           0 /* sampler */,
                           GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
                           BRW_SAMPLER_SIMD_MODE_SIMD4X2,
-                          0));
+                          0),
+         false /* EOT */);
    }
 }