intel/eu: Add some new helpers

[mesa.git] / src / intel / compiler / brw_eu_emit.c
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c

index 1938dd65f4d78c741dd9d0b707c16d77c049855d..e6c6dcb1607aaad1f418a668eacef3ac2ffe0893 100644 (file)
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -237,7 +237,7 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
        assert(reg.file != BRW_IMMEDIATE_VALUE);
        assert(reg.address_mode == BRW_ADDRESS_DIRECT);
        assert(reg.subnr == 0);
-      assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
+      assert(has_scalar_region(reg) ||
               (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
                reg.vstride == reg.width + 1));
        assert(!reg.negate && !reg.abs);
@@ -249,8 +249,9 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
        assert(reg.file == BRW_GENERAL_REGISTER_FILE);
        assert(reg.address_mode == BRW_ADDRESS_DIRECT);
        assert(reg.subnr % 16 == 0);
-      assert(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
-             reg.vstride == reg.width + 1);
+      assert(has_scalar_region(reg) ||
+             (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+              reg.vstride == reg.width + 1));
        assert(!reg.negate && !reg.abs);
        brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
        brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
@@ -357,7 +358,7 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
               reg.file == BRW_ARCHITECTURE_REGISTER_FILE);
        assert(reg.address_mode == BRW_ADDRESS_DIRECT);
        assert(reg.subnr == 0);
-      assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
+      assert(has_scalar_region(reg) ||
               (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
                reg.vstride == reg.width + 1));
        assert(!reg.negate && !reg.abs);
@@ -593,7 +594,7 @@ gen7_set_dp_scratch_message(struct brw_codegen *p,
     const struct gen_device_info *devinfo = p->devinfo;
     assert(num_regs == 1 || num_regs == 2 || num_regs == 4 ||
            (devinfo->gen >= 8 && num_regs == 8));
-   const unsigned block_size = (devinfo->gen >= 8 ? _mesa_logbase2(num_regs) :
+   const unsigned block_size = (devinfo->gen >= 8 ? util_logbase2(num_regs) :
                                  num_regs - 1);
  
     brw_set_desc(p, inst, brw_message_desc(
@@ -639,20 +640,62 @@ brw_inst_set_state(const struct gen_device_info *devinfo,
        brw_inst_set_acc_wr_control(devinfo, insn, state->acc_wr_control);
  }
  
-#define next_insn brw_next_insn
-brw_inst *
-brw_next_insn(struct brw_codegen *p, unsigned opcode)
+static brw_inst *
+brw_append_insns(struct brw_codegen *p, unsigned nr_insn, unsigned align)
  {
-   const struct gen_device_info *devinfo = p->devinfo;
-   brw_inst *insn;
+   assert(util_is_power_of_two_or_zero(sizeof(brw_inst)));
+   assert(util_is_power_of_two_or_zero(align));
+   const unsigned align_insn = MAX2(align / sizeof(brw_inst), 1);
+   const unsigned start_insn = ALIGN(p->nr_insn, align_insn);
+   const unsigned new_nr_insn = start_insn + nr_insn;
  
-   if (p->nr_insn + 1 > p->store_size) {
-      p->store_size <<= 1;
+   if (p->store_size < new_nr_insn) {
+      p->store_size = util_next_power_of_two(new_nr_insn * sizeof(brw_inst));
        p->store = reralloc(p->mem_ctx, p->store, brw_inst, p->store_size);
     }
  
-   p->next_insn_offset += 16;
-   insn = &p->store[p->nr_insn++];
+   /* Memset any padding due to alignment to 0.  We don't want to be hashing
+    * or caching a bunch of random bits we got from a memory allocation.
+    */
+   if (p->nr_insn < start_insn) {
+      memset(&p->store[p->nr_insn], 0,
+             (start_insn - p->nr_insn) * sizeof(brw_inst));
+   }
+
+   assert(p->next_insn_offset == p->nr_insn * sizeof(brw_inst));
+   p->nr_insn = new_nr_insn;
+   p->next_insn_offset = new_nr_insn * sizeof(brw_inst);
+
+   return &p->store[start_insn];
+}
+
+void
+brw_realign(struct brw_codegen *p, unsigned align)
+{
+   brw_append_insns(p, 0, align);
+}
+
+int
+brw_append_data(struct brw_codegen *p, void *data,
+                unsigned size, unsigned align)
+{
+   unsigned nr_insn = DIV_ROUND_UP(size, sizeof(brw_inst));
+   void *dst = brw_append_insns(p, nr_insn, align);
+   memcpy(dst, data, size);
+
+   /* If it's not a whole number of instructions, memset the end */
+   if (size < nr_insn * sizeof(brw_inst))
+      memset(dst + size, 0, nr_insn * sizeof(brw_inst) - size);
+
+   return dst - (void *)p->store;
+}
+
+#define next_insn brw_next_insn
+brw_inst *
+brw_next_insn(struct brw_codegen *p, unsigned opcode)
+{
+   const struct gen_device_info *devinfo = p->devinfo;
+   brw_inst *insn = brw_append_insns(p, 1, sizeof(brw_inst));
  
     memset(insn, 0, sizeof(*insn));
     brw_inst_set_opcode(devinfo, insn, opcode);
@@ -890,7 +933,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
                                              dest.file == BRW_MESSAGE_REGISTER_FILE);
        }
        brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
-      brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 16);
+      brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4);
        brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask);
  
        assert(src0.file == BRW_GENERAL_REGISTER_FILE);
@@ -1035,6 +1078,7 @@ ALU3(CSEL)
  ALU1(FRC)
  ALU1(RNDD)
  ALU1(RNDE)
+ALU1(RNDU)
  ALU1(RNDZ)
  ALU2(MAC)
  ALU2(MACH)
@@ -1725,14 +1769,23 @@ brw_CONT(struct brw_codegen *p)
  }
  
  brw_inst *
-gen6_HALT(struct brw_codegen *p)
+brw_HALT(struct brw_codegen *p)
  {
     const struct gen_device_info *devinfo = p->devinfo;
     brw_inst *insn;
  
     insn = next_insn(p, BRW_OPCODE_HALT);
     brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-   if (devinfo->gen < 8) {
+   if (devinfo->gen < 6) {
+      /* From the Gen4 PRM:
+       *
+       *    "IP register must be put (for example, by the assembler) at <dst>
+       *    and <src0> locations.
+       */
+      brw_set_dest(p, insn, brw_ip_reg());
+      brw_set_src0(p, insn, brw_ip_reg());
+      brw_set_src1(p, insn, brw_imm_d(0x0)); /* exitcode updated later. */
+   } else if (devinfo->gen < 8) {
        brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
        brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
     } else if (devinfo->gen < 12) {
@@ -3150,57 +3203,24 @@ brw_memory_fence(struct brw_codegen *p,
                   struct brw_reg dst,
                   struct brw_reg src,
                   enum opcode send_op,
-                 bool stall,
+                 enum brw_message_target sfid,
+                 bool commit_enable,
                   unsigned bti)
  {
     const struct gen_device_info *devinfo = p->devinfo;
-   const bool commit_enable = stall ||
-      devinfo->gen >= 10 || /* HSD ES # 1404612949 */
-      (devinfo->gen == 7 && !devinfo->is_haswell);
-   struct brw_inst *insn;
  
-   brw_push_insn_state(p);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-   brw_set_default_exec_size(p, BRW_EXECUTE_1);
     dst = retype(vec1(dst), BRW_REGISTER_TYPE_UW);
     src = retype(vec1(src), BRW_REGISTER_TYPE_UD);
  
     /* Set dst as destination for dependency tracking, the MEMORY_FENCE
      * message doesn't write anything back.
      */
-   insn = next_insn(p, send_op);
+   struct brw_inst *insn = next_insn(p, send_op);
+   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
+   brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
     brw_set_dest(p, insn, dst);
     brw_set_src0(p, insn, src);
-   brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
-                                commit_enable, bti);
-
-   if (devinfo->gen == 7 && !devinfo->is_haswell) {
-      /* IVB does typed surface access through the render cache, so we need to
-       * flush it too.  Use a different register so both flushes can be
-       * pipelined by the hardware.
-       */
-      insn = next_insn(p, send_op);
-      brw_set_dest(p, insn, offset(dst, 1));
-      brw_set_src0(p, insn, src);
-      brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,
-                                   commit_enable, bti);
-
-      /* Now write the response of the second message into the response of the
-       * first to trigger a pipeline stall -- This way future render and data
-       * cache messages will be properly ordered with respect to past data and
-       * render cache messages.
-       */
-      brw_MOV(p, dst, offset(dst, 1));
-   }
-
-   if (stall) {
-      brw_set_default_swsb(p, tgl_swsb_sbid(TGL_SBID_DST,
-                                            brw_get_default_swsb(p).sbid));
-
-      brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW), dst);
-   }
-
-   brw_pop_insn_state(p);
+   brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
  }
  
  void
@@ -3383,9 +3403,18 @@ brw_broadcast(struct brw_codegen *p,
         * asserting would be mean.
         */
        const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.ud : 0;
-      brw_MOV(p, dst,
-              (align1 ? stride(suboffset(src, i), 0, 1, 0) :
-               stride(suboffset(src, 4 * i), 0, 4, 1)));
+      src = align1 ? stride(suboffset(src, i), 0, 1, 0) :
+                     stride(suboffset(src, 4 * i), 0, 4, 1);
+
+      if (type_sz(src.type) > 4 && !devinfo->has_64bit_float) {
+         brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
+                    subscript(src, BRW_REGISTER_TYPE_D, 0));
+         brw_set_default_swsb(p, tgl_swsb_null());
+         brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
+                    subscript(src, BRW_REGISTER_TYPE_D, 1));
+      } else {
+         brw_MOV(p, dst, src);
+      }
     } else {
        /* From the Haswell PRM section "Register Region Restrictions":
         *
@@ -3415,7 +3444,7 @@ brw_broadcast(struct brw_codegen *p,
           /* Take into account the component size and horizontal stride. */
           assert(src.vstride == src.hstride + src.width);
           brw_SHL(p, addr, vec1(idx),
-                 brw_imm_ud(_mesa_logbase2(type_sz(src.type)) +
+                 brw_imm_ud(util_logbase2(type_sz(src.type)) +
                              src.hstride - 1));
  
           /* We can only address up to limit bytes using the indirect
@@ -3434,7 +3463,8 @@ brw_broadcast(struct brw_codegen *p,
  
           /* Use indirect addressing to fetch the specified component. */
           if (type_sz(src.type) > 4 &&
-             (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
+             (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo) ||
+              !devinfo->has_64bit_float)) {
              /* From the Cherryview PRM Vol 7. "Register Region Restrictions":
               *
               *    "When source or destination datatype is 64b or operation is