intel/fs: Support SENDS in SHADER_OPCODE_SEND
authorJason Ekstrand <jason.ekstrand@intel.com>
Fri, 16 Nov 2018 03:05:08 +0000 (21:05 -0600)
committerJason Ekstrand <jason@jlekstrand.net>
Tue, 29 Jan 2019 18:43:55 +0000 (18:43 +0000)
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
src/intel/compiler/brw_fs.cpp
src/intel/compiler/brw_fs.h
src/intel/compiler/brw_fs_generator.cpp

index 9ecabd58763b0c52b15b2c4599f69a8b16f4a019..d82572a5b70a189723406a8c94046495d34225ae 100644 (file)
@@ -6759,11 +6759,61 @@ fs_visitor::optimize()
       OPT(lower_simd_width);
    }
 
+   OPT(fixup_sends_duplicate_payload);
+
    lower_uniform_pull_constant_loads();
 
    validate();
 }
 
+/**
+ * From the Skylake PRM Vol. 2a docs for sends:
+ *
+ *    "It is required that the second block of GRFs does not overlap with the
+ *    first block."
+ *
+ * There are plenty of cases where we may accidentally violate this due to
+ * having, for instance, both sources be the constant 0.  This little pass
+ * just adds a new vgrf for the second payload and copies it over.
+ */
+bool
+fs_visitor::fixup_sends_duplicate_payload()
+{
+   bool progress = false;
+
+   foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
+      if (inst->opcode == SHADER_OPCODE_SEND && inst->ex_mlen > 0 &&
+          regions_overlap(inst->src[2], inst->mlen * REG_SIZE,
+                          inst->src[3], inst->ex_mlen * REG_SIZE)) {
+         fs_reg tmp = fs_reg(VGRF, alloc.allocate(inst->ex_mlen),
+                             BRW_REGISTER_TYPE_UD);
+         /* Sadly, we've lost all notion of channels and bit sizes at this
+          * point.  Just WE_all it.
+          */
+         const fs_builder ibld = bld.at(block, inst).exec_all().group(16, 0);
+         fs_reg copy_src = retype(inst->src[3], BRW_REGISTER_TYPE_UD);
+         fs_reg copy_dst = tmp;
+         for (unsigned i = 0; i < inst->ex_mlen; i += 2) {
+            if (inst->ex_mlen == i + 1) {
+               /* Only one register left; do SIMD8 */
+               ibld.group(8, 0).MOV(copy_dst, copy_src);
+            } else {
+               ibld.MOV(copy_dst, copy_src);
+            }
+            copy_src = offset(copy_src, ibld, 1);
+            copy_dst = offset(copy_dst, ibld, 1);
+         }
+         inst->src[3] = tmp;
+         progress = true;
+      }
+   }
+
+   if (progress)
+      invalidate_live_intervals();
+
+   return progress;
+}
+
 /**
  * Three source instruction must have a GRF/MRF destination register.
  * ARF NULL is not allowed.  Fix that up by allocating a temporary GRF.
index 6467b4c6d95de3761521471c129ddfbf60b5378d..5361b7680039555ccc2454ec890108167e72fd4c 100644 (file)
@@ -103,6 +103,7 @@ public:
    void setup_vs_payload();
    void setup_gs_payload();
    void setup_cs_payload();
+   bool fixup_sends_duplicate_payload();
    void fixup_3src_null_dest();
    void assign_curb_setup();
    void calculate_urb_setup();
index 544d19826f422780421c4302943386959ba39d72..e3b68fa31658c2e2d086ca82999bcd627d393b8c 100644 (file)
@@ -258,11 +258,6 @@ fs_generator::generate_send(fs_inst *inst,
                             struct brw_reg payload,
                             struct brw_reg payload2)
 {
-   /* SENDS not yet supported */
-   assert(ex_desc.file == BRW_IMMEDIATE_VALUE && ex_desc.d == 0);
-   assert(payload2.file == BRW_ARCHITECTURE_REGISTER_FILE &&
-          payload2.nr == BRW_ARF_NULL);
-
    const bool dst_is_null = dst.file == BRW_ARCHITECTURE_REGISTER_FILE &&
                             dst.nr == BRW_ARF_NULL;
    const unsigned rlen = dst_is_null ? 0 : inst->size_written / REG_SIZE;
@@ -270,11 +265,23 @@ fs_generator::generate_send(fs_inst *inst,
    uint32_t desc_imm = inst->desc |
       brw_message_desc(devinfo, inst->mlen, rlen, inst->header_size);
 
-   brw_send_indirect_message(p, inst->sfid, dst, payload, desc, desc_imm);
+   uint32_t ex_desc_imm = brw_message_ex_desc(devinfo, inst->ex_mlen);
+
+   if (ex_desc.file != BRW_IMMEDIATE_VALUE || ex_desc.ud || ex_desc_imm) {
+      /* If we have any sort of extended descriptor, then we need SENDS.  This
+       * also covers the dual-payload case because ex_mlen goes in ex_desc.
+       */
+      brw_send_indirect_split_message(p, inst->sfid, dst, payload, payload2,
+                                      desc, desc_imm, ex_desc, ex_desc_imm);
+      if (inst->check_tdr)
+         brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDSC);
+   } else {
+      brw_send_indirect_message(p, inst->sfid, dst, payload, desc, desc_imm);
+      if (inst->check_tdr)
+         brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC);
+   }
 
    brw_inst_set_eot(p->devinfo, brw_last_inst, inst->eot);
-   if (inst->check_tdr)
-      brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC);
 }
 
 void