intel/fs/gen7+: Swap sample mask flag register and FIND_LIVE_CHANNEL temporary.
authorFrancisco Jerez <currojerez@riseup.net>
Sat, 4 Jan 2020 23:48:07 +0000 (15:48 -0800)
committerFrancisco Jerez <currojerez@riseup.net>
Fri, 14 Feb 2020 22:31:48 +0000 (14:31 -0800)
FIND_LIVE_CHANNEL was using f1.0-f1.1 as temporary flag register on
Gen7, instead use f0.0-f0.1.  In order to avoid collision with the
discard sample mask, move the latter to f1.0-f1.1.  This makes room
for keeping track of the sample mask of the second half of SIMD32
programs that use discard.

Note that some MOVs of the sample mask into f1.0 become redundant now
in lower_surface_logical_send() and lower_a64_logical_send().

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>x
src/intel/compiler/brw_fs.cpp
src/intel/compiler/brw_fs.h
src/intel/compiler/brw_fs_builder.h
src/intel/compiler/brw_fs_nir.cpp

index b5834540ef11bbdc356b90bde6965a4d3f765c81..276eb70987b80f04c2781799e1ef38f7bbfcd561 100644 (file)
@@ -5458,15 +5458,17 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
           * vertical predication mode.
           */
          inst->predicate = BRW_PREDICATE_ALIGN1_ALLV;
-         ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg + 2),
-                         sample_mask.type),
-                  sample_mask);
+         if (sample_mask.file != ARF || sample_mask.nr != BRW_ARF_FLAG + 1)
+            ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg + 2),
+                            sample_mask.type),
+                     sample_mask);
       } else {
          inst->flag_subreg = 2;
          inst->predicate = BRW_PREDICATE_NORMAL;
          inst->predicate_inverse = false;
-         ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type),
-                  sample_mask);
+         if (sample_mask.file != ARF || sample_mask.nr != BRW_ARF_FLAG + 1)
+            ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type),
+                     sample_mask);
       }
    }
 
@@ -5646,8 +5648,9 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst)
 
       fs_reg sample_mask = sample_mask_reg(bld);
       const fs_builder ubld = bld.group(1, 0).exec_all();
-      ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type),
-               sample_mask);
+      if (sample_mask.file != ARF || sample_mask.nr != BRW_ARF_FLAG + 1)
+         ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type),
+                  sample_mask);
    }
 
    fs_reg payload, payload2;
index 93d3e4600987d7aed2ce0ebe52abfd14adb31035..543e760b3fe894d501dd53190b296b9d544babc1 100644 (file)
@@ -419,13 +419,15 @@ private:
 
 /**
  * Return the flag register used in fragment shaders to keep track of live
- * samples.
+ * samples.  On Gen7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
+ * dispatch mode, while earlier generations are constrained to f0.1, which
+ * limits the dispatch width to SIMD16 for fragment shaders that use discard.
  */
 static inline unsigned
 sample_mask_flag_subreg(const fs_visitor *shader)
 {
    assert(shader->stage == MESA_SHADER_FRAGMENT);
-   return 1;
+   return shader->devinfo->gen >= 7 ? 2 : 1;
 }
 
 /**
index fac4f5c884c03b06ac72e178e3539c4b1211eb15..896088cc5b8b4658d529715e718126666d457d20 100644 (file)
@@ -406,7 +406,7 @@ namespace brw {
          const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD);
          const dst_reg dst = vgrf(src.type);
 
-         ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index)->flag_subreg = 2;
+         ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
          ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, component(chan_index, 0));
 
          return src_reg(component(dst, 0));
index 93c6ee24404d37a1b796721272efedaabc9263d3..5d66ead4a2413d5e8ead088df2792d293441da9b 100644 (file)
@@ -3490,9 +3490,9 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
    case nir_intrinsic_discard:
    case nir_intrinsic_demote_if:
    case nir_intrinsic_discard_if: {
-      /* We track our discarded pixels in f0.1.  By predicating on it, we can
-       * update just the flag bits that aren't yet discarded.  If there's no
-       * condition, we emit a CMP of g0 != g0, so all currently executing
+      /* We track our discarded pixels in f0.1/f1.0.  By predicating on it, we
+       * can update just the flag bits that aren't yet discarded.  If there's
+       * no condition, we emit a CMP of g0 != g0, so all currently executing
        * channels will get turned off.
        */
       fs_inst *cmp = NULL;