i965/ir: Skip eliminate_find_live_channel() for stages with sparse thread dispatch.

author Francisco Jerez <currojerez@riseup.net>

Fri, 16 Sep 2016 00:20:23 +0000 (17:20 -0700)

committer Francisco Jerez <currojerez@riseup.net>

Wed, 21 Sep 2016 10:45:46 +0000 (13:45 +0300)
author Francisco Jerez <currojerez@riseup.net>
Fri, 16 Sep 2016 00:20:23 +0000 (17:20 -0700)
committer Francisco Jerez <currojerez@riseup.net>
Wed, 21 Sep 2016 10:45:46 +0000 (13:45 +0300)
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h

index 84d3dde8c995cbc1a1b1b67adbdc346718f817ff..445c166082ea861d1fe72c04196dc6fd976e885f 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -868,6 +868,55 @@ encode_slm_size(unsigned gen, uint32_t bytes)
     return slm_size;
  }
  
+/**
+ * Return true if the given shader stage is dispatched contiguously by the
+ * relevant fixed function starting from channel 0 of the SIMD thread, which
+ * implies that the dispatch mask of a thread can be assumed to have the form
+ * '2^n - 1' for some n.
+ */
+static inline bool
+brw_stage_has_packed_dispatch(const struct gen_device_info *devinfo,
+                              gl_shader_stage stage,
+                              const struct brw_stage_prog_data *prog_data)
+{
+   /* The code below makes assumptions about the hardware's thread dispatch
+    * behavior that could be proven wrong in future generations -- Make sure
+    * to do a full test run with brw_fs_test_dispatch_packing() hooked up to
+    * the NIR front-end before changing this assertion.
+    */
+   assert(devinfo->gen <= 9);
+
+   switch (stage) {
+   case MESA_SHADER_FRAGMENT: {
+      /* The PSD discards subspans coming in with no lit samples, which in the
+       * per-pixel shading case implies that each subspan will either be fully
+       * lit (due to the VMask being used to allow derivative computations),
+       * or not dispatched at all.  In per-sample dispatch mode individual
+       * samples from the same subspan have a fixed relative location within
+       * the SIMD thread, so dispatch of unlit samples cannot be avoided in
+       * general and we should return false.
+       */
+      const struct brw_wm_prog_data *wm_prog_data =
+         (const struct brw_wm_prog_data *)prog_data;
+      return !wm_prog_data->persample_dispatch;
+   }
+   case MESA_SHADER_COMPUTE:
+      /* Compute shaders will be spawned with either a fully enabled dispatch
+       * mask or with whatever bottom/right execution mask was given to the
+       * GPGPU walker command to be used along the workgroup edges -- In both
+       * cases the dispatch mask is required to be tightly packed for our
+       * invocation index calculations to work.
+       */
+      return true;
+   default:
+      /* Most remaining fixed functions are limited to use a packed dispatch
+       * mask due to the hardware representation of the dispatch mask as a
+       * single counter representing the number of enabled channels.
+       */
+      return true;
+   }
+}
+
  #ifdef __cplusplus
  } /* extern "C" */
  #endif
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index 5c44007ba884c848af3915c0941d3b63fd99ccf1..b60ec71888a0ef784ef4ea1e6225c2d122fa166a 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2835,6 +2835,14 @@ fs_visitor::eliminate_find_live_channel()
     bool progress = false;
     unsigned depth = 0;
  
+   if (!brw_stage_has_packed_dispatch(devinfo, stage, stage_prog_data)) {
+      /* The optimization below assumes that channel zero is live on thread
+       * dispatch, which may not be the case if the fixed function dispatches
+       * threads sparsely.
+       */
+      return false;
+   }
+
     foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
        switch (inst->opcode) {
        case BRW_OPCODE_IF:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp

index 58c8a8a5bdbe7bce2a5b3195ac120b391304438e..6aa91028fb7f2bb2945016e6cfc77acd59c5e851 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1291,6 +1291,14 @@ vec4_visitor::eliminate_find_live_channel()
     bool progress = false;
     unsigned depth = 0;
  
+   if (!brw_stage_has_packed_dispatch(devinfo, stage, stage_prog_data)) {
+      /* The optimization below assumes that channel zero is live on thread
+       * dispatch, which may not be the case if the fixed function dispatches
+       * threads sparsely.
+       */
+      return false;
+   }
+
     foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
        switch (inst->opcode) {
        case BRW_OPCODE_IF:
author	Francisco Jerez <currojerez@riseup.net>
	Fri, 16 Sep 2016 00:20:23 +0000 (17:20 -0700)
committer	Francisco Jerez <currojerez@riseup.net>
	Wed, 21 Sep 2016 10:45:46 +0000 (13:45 +0300)
src/mesa/drivers/dri/i965/brw_compiler.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_vec4.cpp		patch \| blob \| history