intel/compiler: Relax SENDS regioning assertions
[mesa.git] / src / intel / compiler / brw_eu_emit.c
index c3debfe4d3dee2b1f538ed7f9c283eb284b12c4a..0d5c755f9e8d36e9ee1b7db70af4b2b63fc9aed6 100644 (file)
@@ -95,14 +95,15 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
    else if (dest.file == BRW_GENERAL_REGISTER_FILE)
       assert(dest.nr < 128);
 
-   /* The hardware has a restriction where if the destination is Byte,
-    * the instruction needs to have a stride of 2 (except for packed byte
-    * MOV). This seems to be required even if the destination is the NULL
-    * register.
+   /* The hardware has a restriction where a destination of size Byte with
+    * a stride of 1 is only allowed for a packed byte MOV. For any other
+    * instruction, the stride must be at least 2, even when the destination
+    * is the NULL register.
     */
    if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
        dest.nr == BRW_ARF_NULL &&
-       type_sz(dest.type) == 1) {
+       type_sz(dest.type) == 1 &&
+       dest.hstride == BRW_HORIZONTAL_STRIDE_1) {
       dest.hstride = BRW_HORIZONTAL_STRIDE_2;
    }
 
@@ -236,7 +237,7 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
       assert(reg.file != BRW_IMMEDIATE_VALUE);
       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
       assert(reg.subnr == 0);
-      assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
+      assert(has_scalar_region(reg) ||
              (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
               reg.vstride == reg.width + 1));
       assert(!reg.negate && !reg.abs);
@@ -248,8 +249,9 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
       assert(reg.file == BRW_GENERAL_REGISTER_FILE);
       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
       assert(reg.subnr % 16 == 0);
-      assert(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
-             reg.vstride == reg.width + 1);
+      assert(has_scalar_region(reg) ||
+             (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+              reg.vstride == reg.width + 1));
       assert(!reg.negate && !reg.abs);
       brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
       brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
@@ -356,7 +358,7 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
              reg.file == BRW_ARCHITECTURE_REGISTER_FILE);
       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
       assert(reg.subnr == 0);
-      assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
+      assert(has_scalar_region(reg) ||
              (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
               reg.vstride == reg.width + 1));
       assert(!reg.negate && !reg.abs);
@@ -592,7 +594,7 @@ gen7_set_dp_scratch_message(struct brw_codegen *p,
    const struct gen_device_info *devinfo = p->devinfo;
    assert(num_regs == 1 || num_regs == 2 || num_regs == 4 ||
           (devinfo->gen >= 8 && num_regs == 8));
-   const unsigned block_size = (devinfo->gen >= 8 ? _mesa_logbase2(num_regs) :
+   const unsigned block_size = (devinfo->gen >= 8 ? util_logbase2(num_regs) :
                                 num_regs - 1);
 
    brw_set_desc(p, inst, brw_message_desc(
@@ -848,9 +850,19 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
              src2.file == BRW_IMMEDIATE_VALUE);
 
       if (devinfo->gen >= 12) {
-         brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, src0.file);
+         if (src0.file == BRW_IMMEDIATE_VALUE) {
+            brw_inst_set_3src_a1_src0_is_imm(devinfo, inst, 1);
+         } else {
+            brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, src0.file);
+         }
+
          brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, src1.file);
-         brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, src2.file);
+
+         if (src2.file == BRW_IMMEDIATE_VALUE) {
+            brw_inst_set_3src_a1_src2_is_imm(devinfo, inst, 1);
+         } else {
+            brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, src2.file);
+         }
       } else {
          brw_inst_set_3src_a1_src0_reg_file(devinfo, inst,
                                             src0.file == BRW_GENERAL_REGISTER_FILE ?
@@ -879,7 +891,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
                                             dest.file == BRW_MESSAGE_REGISTER_FILE);
       }
       brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
-      brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 16);
+      brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4);
       brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask);
 
       assert(src0.file == BRW_GENERAL_REGISTER_FILE);
@@ -1009,33 +1021,6 @@ brw_inst *brw_##OP(struct brw_codegen *p,         \
    return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
 }
 
-/* Rounding operations (other than RNDD) require two instructions - the first
- * stores a rounded value (possibly the wrong way) in the dest register, but
- * also sets a per-channel "increment bit" in the flag register.  A predicated
- * add of 1.0 fixes dest to contain the desired result.
- *
- * Sandybridge and later appear to round correctly without an ADD.
- */
-#define ROUND(OP)                                                            \
-void brw_##OP(struct brw_codegen *p,                                         \
-             struct brw_reg dest,                                            \
-             struct brw_reg src)                                             \
-{                                                                            \
-   const struct gen_device_info *devinfo = p->devinfo;                                       \
-   brw_inst *rnd, *add;                                                              \
-   rnd = next_insn(p, BRW_OPCODE_##OP);                                              \
-   brw_set_dest(p, rnd, dest);                                               \
-   brw_set_src0(p, rnd, src);                                                \
-                                                                             \
-   if (devinfo->gen < 6) {                                                           \
-      /* turn on round-increments */                                         \
-      brw_inst_set_cond_modifier(devinfo, rnd, BRW_CONDITIONAL_R);            \
-      add = brw_ADD(p, dest, dest, brw_imm_f(1.0f));                         \
-      brw_inst_set_pred_control(devinfo, add, BRW_PREDICATE_NORMAL);          \
-   }                                                                         \
-}
-
-
 ALU2(SEL)
 ALU1(NOT)
 ALU2(AND)
@@ -1050,6 +1035,9 @@ ALU2(ROR)
 ALU3(CSEL)
 ALU1(FRC)
 ALU1(RNDD)
+ALU1(RNDE)
+ALU1(RNDU)
+ALU1(RNDZ)
 ALU2(MAC)
 ALU2(MACH)
 ALU1(LZD)
@@ -1069,9 +1057,6 @@ ALU1(CBIT)
 ALU2(ADDC)
 ALU2(SUBB)
 
-ROUND(RNDZ)
-ROUND(RNDE)
-
 brw_inst *
 brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0)
 {
@@ -1742,14 +1727,23 @@ brw_CONT(struct brw_codegen *p)
 }
 
 brw_inst *
-gen6_HALT(struct brw_codegen *p)
+brw_HALT(struct brw_codegen *p)
 {
    const struct gen_device_info *devinfo = p->devinfo;
    brw_inst *insn;
 
    insn = next_insn(p, BRW_OPCODE_HALT);
    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-   if (devinfo->gen < 8) {
+   if (devinfo->gen < 6) {
+      /* From the Gen4 PRM:
+       *
+       *    "IP register must be put (for example, by the assembler) at <dst>
+       *    and <src0> locations.
+       */
+      brw_set_dest(p, insn, brw_ip_reg());
+      brw_set_src0(p, insn, brw_ip_reg());
+      brw_set_src1(p, insn, brw_imm_d(0x0)); /* exitcode updated later. */
+   } else if (devinfo->gen < 8) {
       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
    } else if (devinfo->gen < 12) {
@@ -2694,7 +2688,7 @@ brw_send_indirect_split_message(struct brw_codegen *p,
    }
 
    if (ex_desc.file == BRW_IMMEDIATE_VALUE &&
-       (ex_desc.ud & INTEL_MASK(15, 12)) == 0) {
+       (devinfo->gen >= 12 || (ex_desc.ud & INTEL_MASK(15, 12)) == 0)) {
       ex_desc.ud |= ex_desc_imm;
    } else {
       const struct tgl_swsb swsb = brw_get_default_swsb(p);
@@ -2720,8 +2714,9 @@ brw_send_indirect_split_message(struct brw_codegen *p,
       unsigned imm_part = ex_desc_imm | sfid | eot << 5;
 
       if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
-         /* ex_desc bits 15:12 don't exist in the instruction encoding, so
-          * we may have fallen back to an indirect extended descriptor.
+         /* ex_desc bits 15:12 don't exist in the instruction encoding prior
+          * to Gen12, so we may have fallen back to an indirect extended
+          * descriptor.
           */
          brw_MOV(p, addr, brw_imm_ud(ex_desc.ud | imm_part));
       } else {
@@ -3166,57 +3161,24 @@ brw_memory_fence(struct brw_codegen *p,
                  struct brw_reg dst,
                  struct brw_reg src,
                  enum opcode send_op,
-                 bool stall,
+                 enum brw_message_target sfid,
+                 bool commit_enable,
                  unsigned bti)
 {
    const struct gen_device_info *devinfo = p->devinfo;
-   const bool commit_enable = stall ||
-      devinfo->gen >= 10 || /* HSD ES # 1404612949 */
-      (devinfo->gen == 7 && !devinfo->is_haswell);
-   struct brw_inst *insn;
 
-   brw_push_insn_state(p);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-   brw_set_default_exec_size(p, BRW_EXECUTE_1);
    dst = retype(vec1(dst), BRW_REGISTER_TYPE_UW);
    src = retype(vec1(src), BRW_REGISTER_TYPE_UD);
 
    /* Set dst as destination for dependency tracking, the MEMORY_FENCE
     * message doesn't write anything back.
     */
-   insn = next_insn(p, send_op);
+   struct brw_inst *insn = next_insn(p, send_op);
+   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
+   brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
    brw_set_dest(p, insn, dst);
    brw_set_src0(p, insn, src);
-   brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
-                                commit_enable, bti);
-
-   if (devinfo->gen == 7 && !devinfo->is_haswell) {
-      /* IVB does typed surface access through the render cache, so we need to
-       * flush it too.  Use a different register so both flushes can be
-       * pipelined by the hardware.
-       */
-      insn = next_insn(p, send_op);
-      brw_set_dest(p, insn, offset(dst, 1));
-      brw_set_src0(p, insn, src);
-      brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,
-                                   commit_enable, bti);
-
-      /* Now write the response of the second message into the response of the
-       * first to trigger a pipeline stall -- This way future render and data
-       * cache messages will be properly ordered with respect to past data and
-       * render cache messages.
-       */
-      brw_MOV(p, dst, offset(dst, 1));
-   }
-
-   if (stall) {
-      brw_set_default_swsb(p, tgl_swsb_sbid(TGL_SBID_DST,
-                                            brw_get_default_swsb(p).sbid));
-
-      brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW), dst);
-   }
-
-   brw_pop_insn_state(p);
+   brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
 }
 
 void
@@ -3431,7 +3393,7 @@ brw_broadcast(struct brw_codegen *p,
          /* Take into account the component size and horizontal stride. */
          assert(src.vstride == src.hstride + src.width);
          brw_SHL(p, addr, vec1(idx),
-                 brw_imm_ud(_mesa_logbase2(type_sz(src.type)) +
+                 brw_imm_ud(util_logbase2(type_sz(src.type)) +
                             src.hstride - 1));
 
          /* We can only address up to limit bytes using the indirect