i965: Refactor SIMD16-to-2xSIMD8 checks.

[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c

index 57161e282dc1142fa5d5d1087fbb601b069d9b80..1ca79a943046f4568ab4336aab897c21d7290397 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -249,21 +249,21 @@ validate_reg(const struct brw_context *brw, brw_inst *inst, struct brw_reg reg)
         reg.file == BRW_ARF_NULL)
        return;
  
-   assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
+   assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg));
     hstride = hstride_for_reg[reg.hstride];
  
     if (reg.vstride == 0xf) {
        vstride = -1;
     } else {
-      assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
+      assert(reg.vstride >= 0 && reg.vstride < ARRAY_SIZE(vstride_for_reg));
        vstride = vstride_for_reg[reg.vstride];
     }
  
-   assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
+   assert(reg.width >= 0 && reg.width < ARRAY_SIZE(width_for_reg));
     width = width_for_reg[reg.width];
  
     assert(brw_inst_exec_size(brw, inst) >= 0 &&
-          brw_inst_exec_size(brw, inst) < Elements(execsize_for_reg));
+          brw_inst_exec_size(brw, inst) < ARRAY_SIZE(execsize_for_reg));
     execsize = execsize_for_reg[brw_inst_exec_size(brw, inst)];
  
     /* Restrictions from 3.3.10: Register Region Restrictions. */
@@ -1148,28 +1148,47 @@ brw_inst *
  brw_F32TO16(struct brw_compile *p, struct brw_reg dst, struct brw_reg src)
  {
     const struct brw_context *brw = p->brw;
-   bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16;
+   const bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16;
+   /* The F32TO16 instruction doesn't support 32-bit destination types in
+    * Align1 mode, and neither does the Gen8 implementation in terms of a
+    * converting MOV.  Gen7 does zero out the high 16 bits in Align16 mode as
+    * an undocumented feature.
+    */
+   const bool needs_zero_fill = (dst.type == BRW_REGISTER_TYPE_UD &&
+                                 (!align16 || brw->gen >= 8));
+   brw_inst *inst;
  
     if (align16) {
        assert(dst.type == BRW_REGISTER_TYPE_UD);
     } else {
-      assert(dst.type == BRW_REGISTER_TYPE_W ||
+      assert(dst.type == BRW_REGISTER_TYPE_UD ||
+             dst.type == BRW_REGISTER_TYPE_W ||
               dst.type == BRW_REGISTER_TYPE_UW ||
               dst.type == BRW_REGISTER_TYPE_HF);
     }
  
+   brw_push_insn_state(p);
+
+   if (needs_zero_fill) {
+      brw_set_default_access_mode(p, BRW_ALIGN_1);
+      dst = spread(retype(dst, BRW_REGISTER_TYPE_W), 2);
+   }
+
     if (brw->gen >= 8) {
-      if (align16) {
-         /* Emulate the Gen7 zeroing bug (see comments in vec4_visitor's
-          * emit_pack_half_2x16 method.)
-          */
-         brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));
-      }
-      return brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src);
+      inst = brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src);
     } else {
        assert(brw->gen == 7);
-      return brw_alu1(p, BRW_OPCODE_F32TO16, dst, src);
+      inst = brw_alu1(p, BRW_OPCODE_F32TO16, dst, src);
+   }
+
+   if (needs_zero_fill) {
+      brw_inst_set_no_dd_clear(brw, inst, true);
+      inst = brw_MOV(p, suboffset(dst, 1), brw_imm_ud(0u));
+      brw_inst_set_no_dd_check(brw, inst, true);
     }
+
+   brw_pop_insn_state(p);
+   return inst;
  }
  
  brw_inst *
@@ -1181,6 +1200,15 @@ brw_F16TO32(struct brw_compile *p, struct brw_reg dst, struct brw_reg src)
     if (align16) {
        assert(src.type == BRW_REGISTER_TYPE_UD);
     } else {
+      /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
+       *
+       *   Because this instruction does not have a 16-bit floating-point
+       *   type, the source data type must be Word (W). The destination type
+       *   must be F (Float).
+       */
+      if (src.type == BRW_REGISTER_TYPE_UD)
+         src = spread(retype(src, BRW_REGISTER_TYPE_W), 2);
+
        assert(src.type == BRW_REGISTER_TYPE_W ||
               src.type == BRW_REGISTER_TYPE_UW ||
               src.type == BRW_REGISTER_TYPE_HF);
@@ -1304,7 +1332,7 @@ brw_IF(struct brw_compile *p, unsigned execute_size)
     } else if (brw->gen == 7) {
        brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
        brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
-      brw_set_src1(p, insn, brw_imm_ud(0));
+      brw_set_src1(p, insn, brw_imm_w(0));
        brw_inst_set_jip(brw, insn, 0);
        brw_inst_set_uip(brw, insn, 0);
     } else {
@@ -1505,7 +1533,7 @@ brw_ELSE(struct brw_compile *p)
     } else if (brw->gen == 7) {
        brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
        brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-      brw_set_src1(p, insn, brw_imm_ud(0));
+      brw_set_src1(p, insn, brw_imm_w(0));
        brw_inst_set_jip(brw, insn, 0);
        brw_inst_set_uip(brw, insn, 0);
     } else {
@@ -1582,7 +1610,7 @@ brw_ENDIF(struct brw_compile *p)
     } else if (brw->gen == 7) {
        brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
        brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-      brw_set_src1(p, insn, brw_imm_ud(0));
+      brw_set_src1(p, insn, brw_imm_w(0));
     } else {
        brw_set_src0(p, insn, brw_imm_d(0));
     }
@@ -1774,7 +1802,7 @@ brw_WHILE(struct brw_compile *p)
        } else if (brw->gen == 7) {
           brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
           brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-         brw_set_src1(p, insn, brw_imm_ud(0));
+         brw_set_src1(p, insn, brw_imm_w(0));
           brw_inst_set_jip(brw, insn, br * (do_insn - insn));
        } else {
           brw_set_dest(p, insn, brw_imm_w(0));
@@ -1849,14 +1877,6 @@ void brw_CMP(struct brw_compile *p,
     struct brw_context *brw = p->brw;
     brw_inst *insn = next_insn(p, BRW_OPCODE_CMP);
  
-   if (brw->gen >= 8) {
-      /* The CMP instruction appears to behave erratically for floating point
-       * sources unless the destination type is also float.  Overriding it to
-       * match src0 makes it work in all cases.
-       */
-      dest.type = src0.type;
-   }
-
     brw_inst_set_cond_modifier(brw, insn, conditional);
     brw_set_dest(p, insn, dest);
     brw_set_src0(p, insn, src0);
@@ -2272,6 +2292,7 @@ void brw_fb_WRITE(struct brw_compile *p,
                    unsigned msg_length,
                    unsigned response_length,
                    bool eot,
+                  bool last_render_target,
                    bool header_present)
  {
     struct brw_context *brw = p->brw;
@@ -2313,7 +2334,7 @@ void brw_fb_WRITE(struct brw_compile *p,
                             msg_type,
                             msg_length,
                             header_present,
-                           eot, /* last render target write */
+                           last_render_target,
                             response_length,
                             eot,
                             0 /* send_commit_msg */);