i965/fs: clamp exec_size when an instruction has a scalar DF source
authorSamuel Iglesias Gonsálvez <siglesias@igalia.com>
Wed, 11 Jan 2017 07:17:57 +0000 (08:17 +0100)
committerFrancisco Jerez <currojerez@riseup.net>
Fri, 14 Apr 2017 21:56:07 +0000 (14:56 -0700)
Then the SIMD lowering pass will get rid of any compressed instructions with scalar
source (whether force_writemask_all or not) and we avoid hitting the Gen7 region
decompression bug.

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Suggested-by: Francisco Jerez <currojerez@riseup.net>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
src/intel/compiler/brw_fs.cpp

index 3fc7ae48943427bbf7e552552154aa2264ce17d5..086b1a0485513381ee287a7a40d78bed5302fa97 100644 (file)
@@ -4529,11 +4529,16 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
     */
    if (devinfo->gen < 8) {
       for (unsigned i = 0; i < inst->sources; i++) {
+         /* IVB implements DF scalars as <0;2,1> regions. */
+         const bool is_scalar_exception = is_uniform(inst->src[i]) &&
+            (devinfo->is_haswell || type_sz(inst->src[i].type) != 8);
+         const bool is_packed_word_exception =
+            type_sz(inst->dst.type) == 4 && inst->dst.stride == 1 &&
+            type_sz(inst->src[i].type) == 2 && inst->src[i].stride == 1;
+
          if (inst->size_written > REG_SIZE &&
              inst->size_read(i) != 0 && inst->size_read(i) <= REG_SIZE &&
-             !is_uniform(inst->src[i]) &&
-             !(type_sz(inst->dst.type) == 4 && inst->dst.stride == 1 &&
-               type_sz(inst->src[i].type) == 2 && inst->src[i].stride == 1)) {
+             !is_scalar_exception && !is_packed_word_exception) {
             const unsigned reg_count = DIV_ROUND_UP(inst->size_written, REG_SIZE);
             max_width = MIN2(max_width, inst->exec_size / reg_count);
          }