i965: Fix intel_miptree_is_fast_clear_capable()
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
index 92d97dec8249eef20f280cc905239fb1432c0513..dc699bb63210384bebef1caefa156ed1ab48c06a 100644 (file)
@@ -95,7 +95,7 @@ brw_reg_type_to_hw_type(const struct brw_device_info *devinfo,
                         enum brw_reg_type type, unsigned file)
 {
    if (file == BRW_IMMEDIATE_VALUE) {
-      const static int imm_hw_types[] = {
+      static const int imm_hw_types[] = {
          [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD,
          [BRW_REGISTER_TYPE_D]  = BRW_HW_REG_TYPE_D,
          [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW,
@@ -117,7 +117,7 @@ brw_reg_type_to_hw_type(const struct brw_device_info *devinfo,
       return imm_hw_types[type];
    } else {
       /* Non-immediate registers */
-      const static int hw_types[] = {
+      static const int hw_types[] = {
          [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD,
          [BRW_REGISTER_TYPE_D]  = BRW_HW_REG_TYPE_D,
          [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW,
@@ -146,8 +146,9 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
 {
    const struct brw_device_info *devinfo = p->devinfo;
 
-   if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
-       dest.file != BRW_MESSAGE_REGISTER_FILE)
+   if (dest.file == BRW_MESSAGE_REGISTER_FILE)
+      assert((dest.nr & ~(1 << 7)) < BRW_MAX_MRF(devinfo->gen));
+   else if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(dest.nr < 128);
 
    gen7_convert_mrf_to_grf(p, &dest);
@@ -235,6 +236,15 @@ validate_reg(const struct brw_device_info *devinfo,
        reg.file == BRW_ARF_NULL)
       return;
 
+   /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
+    *
+    *    "Swizzling is not allowed when an accumulator is used as an implicit
+    *    source or an explicit source in an instruction."
+    */
+   if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+       reg.nr == BRW_ARF_ACCUMULATOR)
+      assert(reg.dw1.bits.swizzle == BRW_SWIZZLE_XYZW);
+
    assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg));
    hstride = hstride_for_reg[reg.hstride];
 
@@ -300,7 +310,9 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
 {
    const struct brw_device_info *devinfo = p->devinfo;
 
-   if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
+   if (reg.file == BRW_MESSAGE_REGISTER_FILE)
+      assert((reg.nr & ~(1 << 7)) < BRW_MAX_MRF(devinfo->gen));
+   else if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(reg.nr < 128);
 
    gen7_convert_mrf_to_grf(p, &reg);
@@ -443,6 +455,14 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
    if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(reg.nr < 128);
 
+   /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
+    *
+    *    "Accumulator registers may be accessed explicitly as src0
+    *    operands only."
+    */
+   assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
+          reg.nr != BRW_ARF_ACCUMULATOR);
+
    gen7_convert_mrf_to_grf(p, &reg);
    assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
 
@@ -914,6 +934,8 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
          brw_inst_set_3src_src_type(devinfo, inst, BRW_3SRC_TYPE_UD);
          brw_inst_set_3src_dst_type(devinfo, inst, BRW_3SRC_TYPE_UD);
          break;
+      default:
+         unreachable("not reached");
       }
    }
 
@@ -1582,8 +1604,8 @@ brw_ENDIF(struct brw_codegen *p)
    }
 
    if (devinfo->gen < 6) {
-      brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
-      brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src1(p, insn, brw_imm_d(0x0));
    } else if (devinfo->gen == 6) {
       brw_set_dest(p, insn, brw_imm_w(0));
@@ -2463,7 +2485,7 @@ void brw_urb_WRITE(struct brw_codegen *p,
 
    insn = next_insn(p, BRW_OPCODE_SEND);
 
-   assert(msg_length < BRW_MAX_MRF);
+   assert(msg_length < BRW_MAX_MRF(devinfo->gen));
 
    brw_set_dest(p, insn, dest);
    brw_set_src0(p, insn, src0);
@@ -3114,6 +3136,76 @@ brw_typed_surface_write(struct brw_codegen *p,
       p, insn, num_channels);
 }
 
+static void
+brw_set_memory_fence_message(struct brw_codegen *p,
+                             struct brw_inst *insn,
+                             enum brw_message_target sfid,
+                             bool commit_enable)
+{
+   const struct brw_device_info *devinfo = p->devinfo;
+
+   brw_set_message_descriptor(p, insn, sfid,
+                              1 /* message length */,
+                              (commit_enable ? 1 : 0) /* response length */,
+                              true /* header present */,
+                              false);
+
+   switch (sfid) {
+   case GEN6_SFID_DATAPORT_RENDER_CACHE:
+      brw_inst_set_dp_msg_type(devinfo, insn, GEN7_DATAPORT_RC_MEMORY_FENCE);
+      break;
+   case GEN7_SFID_DATAPORT_DATA_CACHE:
+      brw_inst_set_dp_msg_type(devinfo, insn, GEN7_DATAPORT_DC_MEMORY_FENCE);
+      break;
+   default:
+      unreachable("Not reached");
+   }
+
+   if (commit_enable)
+      brw_inst_set_dp_msg_control(devinfo, insn, 1 << 5);
+}
+
+void
+brw_memory_fence(struct brw_codegen *p,
+                 struct brw_reg dst)
+{
+   const struct brw_device_info *devinfo = p->devinfo;
+   const bool commit_enable = devinfo->gen == 7 && !devinfo->is_haswell;
+   struct brw_inst *insn;
+
+   /* Set dst as destination for dependency tracking, the MEMORY_FENCE
+    * message doesn't write anything back.
+    */
+   insn = next_insn(p, BRW_OPCODE_SEND);
+   brw_set_dest(p, insn, dst);
+   brw_set_src0(p, insn, dst);
+   brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
+                                commit_enable);
+
+   if (devinfo->gen == 7 && !devinfo->is_haswell) {
+      /* IVB does typed surface access through the render cache, so we need to
+       * flush it too.  Use a different register so both flushes can be
+       * pipelined by the hardware.
+       */
+      insn = next_insn(p, BRW_OPCODE_SEND);
+      brw_set_dest(p, insn, offset(dst, 1));
+      brw_set_src0(p, insn, offset(dst, 1));
+      brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,
+                                   commit_enable);
+
+      /* Now write the response of the second message into the response of the
+       * first to trigger a pipeline stall -- This way future render and data
+       * cache messages will be properly ordered with respect to past data and
+       * render cache messages.
+       */
+      brw_push_insn_state(p);
+      brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+      brw_MOV(p, dst, offset(dst, 1));
+      brw_pop_insn_state(p);
+   }
+}
+
 void
 brw_pixel_interpolator_query(struct brw_codegen *p,
                              struct brw_reg dest,
@@ -3142,6 +3234,153 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
    brw_inst_set_pi_message_data(devinfo, insn, data);
 }
 
+void
+brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst)
+{
+   const struct brw_device_info *devinfo = p->devinfo;
+   brw_inst *inst;
+
+   assert(devinfo->gen >= 7);
+
+   brw_push_insn_state(p);
+
+   if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
+      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+
+      if (devinfo->gen >= 8) {
+         /* Getting the first active channel index is easy on Gen8: Just find
+          * the first bit set in the mask register.  The same register exists
+          * on HSW already but it reads back as all ones when the current
+          * instruction has execution masking disabled, so it's kind of
+          * useless.
+          */
+         inst = brw_FBL(p, vec1(dst),
+                        retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD));
+
+         /* Quarter control has the effect of magically shifting the value of
+          * this register.  Make sure it's set to zero.
+          */
+         brw_inst_set_qtr_control(devinfo, inst, GEN6_COMPRESSION_1Q);
+      } else {
+         const struct brw_reg flag = retype(brw_flag_reg(1, 0),
+                                            BRW_REGISTER_TYPE_UD);
+
+         brw_MOV(p, flag, brw_imm_ud(0));
+
+         /* Run a 16-wide instruction returning zero with execution masking
+          * and a conditional modifier enabled in order to get the current
+          * execution mask in f1.0.
+          */
+         inst = brw_MOV(p, brw_null_reg(), brw_imm_ud(0));
+         brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_16);
+         brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE);
+         brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_Z);
+         brw_inst_set_flag_reg_nr(devinfo, inst, 1);
+
+         brw_FBL(p, vec1(dst), flag);
+      }
+   } else {
+      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+
+      if (devinfo->gen >= 8) {
+         /* In SIMD4x2 mode the first active channel index is just the
+          * negation of the first bit of the mask register.
+          */
+         inst = brw_AND(p, brw_writemask(dst, WRITEMASK_X),
+                        negate(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD)),
+                        brw_imm_ud(1));
+
+      } else {
+         /* Overwrite the destination without and with execution masking to
+          * find out which of the channels is active.
+          */
+         brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X),
+                 brw_imm_ud(1));
+
+         inst = brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X),
+                        brw_imm_ud(0));
+         brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE);
+      }
+   }
+
+   brw_pop_insn_state(p);
+}
+
+void
+brw_broadcast(struct brw_codegen *p,
+              struct brw_reg dst,
+              struct brw_reg src,
+              struct brw_reg idx)
+{
+   const struct brw_device_info *devinfo = p->devinfo;
+   const bool align1 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1;
+   brw_inst *inst;
+
+   assert(src.file == BRW_GENERAL_REGISTER_FILE &&
+          src.address_mode == BRW_ADDRESS_DIRECT);
+
+   if ((src.vstride == 0 && (src.hstride == 0 || !align1)) ||
+       idx.file == BRW_IMMEDIATE_VALUE) {
+      /* Trivial, the source is already uniform or the index is a constant.
+       * We will typically not get here if the optimizer is doing its job, but
+       * asserting would be mean.
+       */
+      const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.dw1.ud : 0;
+      brw_MOV(p, dst,
+              (align1 ? stride(suboffset(src, i), 0, 1, 0) :
+               stride(suboffset(src, 4 * i), 0, 4, 1)));
+   } else {
+      if (align1) {
+         const struct brw_reg addr =
+            retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
+         const unsigned offset = src.nr * REG_SIZE + src.subnr;
+         /* Limit in bytes of the signed indirect addressing immediate. */
+         const unsigned limit = 512;
+
+         brw_push_insn_state(p);
+         brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+         brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+
+         /* Take into account the component size and horizontal stride. */
+         assert(src.vstride == src.hstride + src.width);
+         brw_SHL(p, addr, vec1(idx),
+                 brw_imm_ud(_mesa_logbase2(type_sz(src.type)) +
+                            src.hstride - 1));
+
+         /* We can only address up to limit bytes using the indirect
+          * addressing immediate, account for the difference if the source
+          * register is above this limit.
+          */
+         if (offset >= limit)
+            brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit));
+
+         brw_pop_insn_state(p);
+
+         /* Use indirect addressing to fetch the specified component. */
+         brw_MOV(p, dst,
+                 retype(brw_vec1_indirect(addr.subnr, offset % limit),
+                        src.type));
+      } else {
+         /* In SIMD4x2 mode the index can be either zero or one, replicate it
+          * to all bits of a flag register,
+          */
+         inst = brw_MOV(p,
+                        brw_null_reg(),
+                        stride(brw_swizzle1(idx, 0), 0, 4, 1));
+         brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NONE);
+         brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_NZ);
+         brw_inst_set_flag_reg_nr(devinfo, inst, 1);
+
+         /* and use predicated SEL to pick the right channel. */
+         inst = brw_SEL(p, dst,
+                        stride(suboffset(src, 4), 0, 4, 1),
+                        stride(src, 0, 4, 1));
+         brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NORMAL);
+         brw_inst_set_flag_reg_nr(devinfo, inst, 1);
+      }
+   }
+}
+
 /**
  * This instruction is generated as a single-channel align1 instruction by
  * both the VS and FS stages when using INTEL_DEBUG=shader_time.
@@ -3187,3 +3426,54 @@ void brw_shader_time_add(struct brw_codegen *p,
 
    brw_pop_insn_state(p);
 }
+
+
+/**
+ * Emit the SEND message for a barrier
+ */
+void
+brw_barrier(struct brw_codegen *p, struct brw_reg src)
+{
+   const struct brw_device_info *devinfo = p->devinfo;
+   struct brw_inst *inst;
+
+   assert(devinfo->gen >= 7);
+
+   inst = next_insn(p, BRW_OPCODE_SEND);
+   brw_set_dest(p, inst, brw_null_reg());
+   brw_set_src0(p, inst, src);
+   brw_set_src1(p, inst, brw_null_reg());
+
+   brw_set_message_descriptor(p, inst, BRW_SFID_MESSAGE_GATEWAY,
+                              1 /* msg_length */,
+                              0 /* response_length */,
+                              false /* header_present */,
+                              false /* end_of_thread */);
+
+   brw_inst_set_gateway_notify(devinfo, inst, 1);
+   brw_inst_set_gateway_subfuncid(devinfo, inst,
+                                  BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG);
+
+   brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
+}
+
+
+/**
+ * Emit the wait instruction for a barrier
+ */
+void
+brw_WAIT(struct brw_codegen *p)
+{
+   const struct brw_device_info *devinfo = p->devinfo;
+   struct brw_inst *insn;
+
+   struct brw_reg src = brw_notification_reg();
+
+   insn = next_insn(p, BRW_OPCODE_WAIT);
+   brw_set_dest(p, insn, src);
+   brw_set_src0(p, insn, src);
+   brw_set_src1(p, insn, brw_null_reg());
+
+   brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
+   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
+}