anv: add new gem/drm helpers

[mesa.git] / src / intel / compiler / brw_fs_lower_regioning.cpp
diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp b/src/intel/compiler/brw_fs_lower_regioning.cpp

index c60d47004191a542bf72ad6a0ebebae0ac6ca5b3..8ae52dfef96a6b038dca28b1a546d3d49bdccb9a 100644 (file)
--- a/src/intel/compiler/brw_fs_lower_regioning.cpp
+++ b/src/intel/compiler/brw_fs_lower_regioning.cpp
@@ -127,20 +127,37 @@ namespace {
     has_invalid_src_region(const gen_device_info *devinfo, const fs_inst *inst,
                            unsigned i)
     {
-      if (is_unordered(inst) || inst->is_control_source(i)) {
+      if (is_unordered(inst) || inst->is_control_source(i))
           return false;
-      } else {
-         const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type);
-         const unsigned src_byte_stride = inst->src[i].stride *
-            type_sz(inst->src[i].type);
-         const unsigned dst_byte_offset = reg_offset(inst->dst) % REG_SIZE;
-         const unsigned src_byte_offset = reg_offset(inst->src[i]) % REG_SIZE;
  
-         return has_dst_aligned_region_restriction(devinfo, inst) &&
-                !is_uniform(inst->src[i]) &&
-                (src_byte_stride != dst_byte_stride ||
-                 src_byte_offset != dst_byte_offset);
+      /* Empirical testing shows that Broadwell has a bug affecting half-float
+       * MAD instructions when any of its sources has a non-zero offset, such
+       * as:
+       *
+       * mad(8) g18<1>HF -g17<4,4,1>HF g14.8<4,4,1>HF g11<4,4,1>HF { align16 1Q };
+       *
+       * We used to generate code like this for SIMD8 executions where we
+       * used to pack components Y and W of a vector at offset 16B of a SIMD
+       * register. The problem doesn't occur if the stride of the source is 0.
+       */
+      if (devinfo->gen == 8 &&
+          inst->opcode == BRW_OPCODE_MAD &&
+          inst->src[i].type == BRW_REGISTER_TYPE_HF &&
+          reg_offset(inst->src[i]) % REG_SIZE > 0 &&
+          inst->src[i].stride != 0) {
+         return true;
        }
+
+      const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type);
+      const unsigned src_byte_stride = inst->src[i].stride *
+         type_sz(inst->src[i].type);
+      const unsigned dst_byte_offset = reg_offset(inst->dst) % REG_SIZE;
+      const unsigned src_byte_offset = reg_offset(inst->src[i]) % REG_SIZE;
+
+      return has_dst_aligned_region_restriction(devinfo, inst) &&
+             !is_uniform(inst->src[i]) &&
+             (src_byte_stride != dst_byte_stride ||
+              src_byte_offset != dst_byte_offset);
     }
  
     /*
@@ -239,6 +256,12 @@ namespace brw {
     lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i)
     {
        assert(inst->components_read(i) == 1);
+      assert(v->devinfo->has_integer_dword_mul ||
+             inst->opcode != BRW_OPCODE_MUL ||
+             brw_reg_type_is_floating_point(get_exec_type(inst)) ||
+             MIN2(type_sz(inst->src[0].type), type_sz(inst->src[1].type)) >= 4 ||
+             type_sz(inst->src[i].type) == get_exec_type_size(inst));
+
        const fs_builder ibld(v, block, inst);
        const fs_reg tmp = ibld.vgrf(get_exec_type(inst));
  
@@ -271,7 +294,9 @@ namespace {
        const unsigned stride =
           type_sz(inst->dst.type) * inst->dst.stride <= type_sz(type) ? 1 :
           type_sz(inst->dst.type) * inst->dst.stride / type_sz(type);
-      const fs_reg tmp = horiz_stride(ibld.vgrf(type, stride), stride);
+      fs_reg tmp = ibld.vgrf(type, stride);
+      ibld.UNDEF(tmp);
+      tmp = horiz_stride(tmp, stride);
  
        /* Emit a MOV taking care of all the destination modifiers. */
        fs_inst *mov = ibld.at(block, inst->next).MOV(inst->dst, tmp);
@@ -312,8 +337,9 @@ namespace {
        const unsigned stride = type_sz(inst->dst.type) * inst->dst.stride /
                                type_sz(inst->src[i].type);
        assert(stride > 0);
-      const fs_reg tmp = horiz_stride(ibld.vgrf(inst->src[i].type, stride),
-                                      stride);
+      fs_reg tmp = ibld.vgrf(inst->src[i].type, stride);
+      ibld.UNDEF(tmp);
+      tmp = horiz_stride(tmp, stride);
  
        /* Emit a series of 32-bit integer copies with any source modifiers
         * cleaned up (because their semantics are dependent on the type).
@@ -360,8 +386,9 @@ namespace {
        const unsigned stride = required_dst_byte_stride(inst) /
                                type_sz(inst->dst.type);
        assert(stride > 0);
-      const fs_reg tmp = horiz_stride(ibld.vgrf(inst->dst.type, stride),
-                                      stride);
+      fs_reg tmp = ibld.vgrf(inst->dst.type, stride);
+      ibld.UNDEF(tmp);
+      tmp = horiz_stride(tmp, stride);
  
        /* Emit a series of 32-bit integer copies from the temporary into the
         * original destination.
@@ -433,7 +460,7 @@ fs_visitor::lower_regioning()
        progress |= lower_instruction(this, block, inst);
  
     if (progress)
-      invalidate_live_intervals();
+      invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
  
     return progress;
  }