return slm_size;
}
+/**
+ * Return true if the given shader stage is dispatched contiguously by the
+ * relevant fixed function starting from channel 0 of the SIMD thread, which
+ * implies that the dispatch mask of a thread can be assumed to have the form
+ * '2^n - 1' for some n.
+ */
+static inline bool
+brw_stage_has_packed_dispatch(const struct gen_device_info *devinfo,
+ gl_shader_stage stage,
+ const struct brw_stage_prog_data *prog_data)
+{
+ /* The code below makes assumptions about the hardware's thread dispatch
+ * behavior that could be proven wrong in future generations -- Make sure
+ * to do a full test run with brw_fs_test_dispatch_packing() hooked up to
+ * the NIR front-end before changing this assertion.
+ */
+ assert(devinfo->gen <= 9);
+
+ switch (stage) {
+ case MESA_SHADER_FRAGMENT: {
+ /* The PSD discards subspans coming in with no lit samples, which in the
+ * per-pixel shading case implies that each subspan will either be fully
+ * lit (due to the VMask being used to allow derivative computations),
+ * or not dispatched at all. In per-sample dispatch mode individual
+ * samples from the same subspan have a fixed relative location within
+ * the SIMD thread, so dispatch of unlit samples cannot be avoided in
+ * general and we should return false.
+ */
+ const struct brw_wm_prog_data *wm_prog_data =
+ (const struct brw_wm_prog_data *)prog_data;
+ return !wm_prog_data->persample_dispatch;
+ }
+ case MESA_SHADER_COMPUTE:
+ /* Compute shaders will be spawned with either a fully enabled dispatch
+ * mask or with whatever bottom/right execution mask was given to the
+ * GPGPU walker command to be used along the workgroup edges -- In both
+ * cases the dispatch mask is required to be tightly packed for our
+ * invocation index calculations to work.
+ */
+ return true;
+ default:
+ /* Most remaining fixed functions are limited to use a packed dispatch
+ * mask due to the hardware representation of the dispatch mask as a
+ * single counter representing the number of enabled channels.
+ */
+ return true;
+ }
+}
+
#ifdef __cplusplus
} /* extern "C" */
#endif
bool progress = false;
unsigned depth = 0;
+ if (!brw_stage_has_packed_dispatch(devinfo, stage, stage_prog_data)) {
+ /* The optimization below assumes that channel zero is live on thread
+ * dispatch, which may not be the case if the fixed function dispatches
+ * threads sparsely.
+ */
+ return false;
+ }
+
foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
switch (inst->opcode) {
case BRW_OPCODE_IF:
bool progress = false;
unsigned depth = 0;
+ if (!brw_stage_has_packed_dispatch(devinfo, stage, stage_prog_data)) {
+ /* The optimization below assumes that channel zero is live on thread
+ * dispatch, which may not be the case if the fixed function dispatches
+ * threads sparsely.
+ */
+ return false;
+ }
+
foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
switch (inst->opcode) {
case BRW_OPCODE_IF: