intel/fs: Fix Gen7 compressed source region alignment restriction for SIMD32
authorFrancisco Jerez <currojerez@riseup.net>
Sat, 14 Jan 2017 01:04:23 +0000 (17:04 -0800)
committerJason Ekstrand <jason.ekstrand@intel.com>
Thu, 28 Jun 2018 20:19:38 +0000 (13:19 -0700)
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Matt Turner <mattst88@gmail.com>
src/intel/compiler/brw_fs.cpp

index 7c76c98bb0ed22f0146efd0a452c4738cb48da02..a564dd6ed15e8901ea6e299c67c12834f2cc4fc5 100644 (file)
@@ -5076,8 +5076,14 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
             type_sz(inst->dst.type) == 4 && inst->dst.stride == 1 &&
             type_sz(inst->src[i].type) == 2 && inst->src[i].stride == 1;
 
+         /* We check size_read(i) against size_written instead of REG_SIZE
+          * because we want to properly handle SIMD32.  In SIMD32, you can end
+          * up with writes to 4 registers and a source that reads 2 registers
+          * and we may still need to lower all the way to SIMD8 in that case.
+          */
          if (inst->size_written > REG_SIZE &&
-             inst->size_read(i) != 0 && inst->size_read(i) <= REG_SIZE &&
+             inst->size_read(i) != 0 &&
+             inst->size_read(i) < inst->size_written &&
              !is_scalar_exception && !is_packed_word_exception) {
             const unsigned reg_count = DIV_ROUND_UP(inst->size_written, REG_SIZE);
             max_width = MIN2(max_width, inst->exec_size / reg_count);