From: Francisco Jerez Date: Sat, 4 Jan 2020 23:48:07 +0000 (-0800) Subject: intel/fs/gen7+: Swap sample mask flag register and FIND_LIVE_CHANNEL temporary. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a792e11f5ccb28f5d2430008d462c79888a077c3;p=mesa.git intel/fs/gen7+: Swap sample mask flag register and FIND_LIVE_CHANNEL temporary. FIND_LIVE_CHANNEL was using f1.0-f1.1 as temporary flag register on Gen7, instead use f0.0-f0.1. In order to avoid collision with the discard sample mask, move the latter to f1.0-f1.1. This makes room for keeping track of the sample mask of the second half of SIMD32 programs that use discard. Note that some MOVs of the sample mask into f1.0 become redundant now in lower_surface_logical_send() and lower_a64_logical_send(). Reviewed-by: Kenneth Graunke x --- diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index b5834540ef1..276eb70987b 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -5458,15 +5458,17 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst) * vertical predication mode. */ inst->predicate = BRW_PREDICATE_ALIGN1_ALLV; - ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg + 2), - sample_mask.type), - sample_mask); + if (sample_mask.file != ARF || sample_mask.nr != BRW_ARF_FLAG + 1) + ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg + 2), + sample_mask.type), + sample_mask); } else { inst->flag_subreg = 2; inst->predicate = BRW_PREDICATE_NORMAL; inst->predicate_inverse = false; - ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type), - sample_mask); + if (sample_mask.file != ARF || sample_mask.nr != BRW_ARF_FLAG + 1) + ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type), + sample_mask); } } @@ -5646,8 +5648,9 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst) fs_reg sample_mask = sample_mask_reg(bld); const fs_builder ubld = bld.group(1, 0).exec_all(); - ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type), - sample_mask); + if (sample_mask.file != ARF || sample_mask.nr != BRW_ARF_FLAG + 1) + ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type), + sample_mask); } fs_reg payload, payload2; diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 93d3e460098..543e760b3fe 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -419,13 +419,15 @@ private: /** * Return the flag register used in fragment shaders to keep track of live - * samples. + * samples. On Gen7+ we use f1.0-f1.1 to allow discard jumps in SIMD32 + * dispatch mode, while earlier generations are constrained to f0.1, which + * limits the dispatch width to SIMD16 for fragment shaders that use discard. */ static inline unsigned sample_mask_flag_subreg(const fs_visitor *shader) { assert(shader->stage == MESA_SHADER_FRAGMENT); - return 1; + return shader->devinfo->gen >= 7 ? 2 : 1; } /** diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index fac4f5c884c..896088cc5b8 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -406,7 +406,7 @@ namespace brw { const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD); const dst_reg dst = vgrf(src.type); - ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index)->flag_subreg = 2; + ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index); ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, component(chan_index, 0)); return src_reg(component(dst, 0)); diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 93c6ee24404..5d66ead4a24 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -3490,9 +3490,9 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, case nir_intrinsic_discard: case nir_intrinsic_demote_if: case nir_intrinsic_discard_if: { - /* We track our discarded pixels in f0.1. By predicating on it, we can - * update just the flag bits that aren't yet discarded. If there's no - * condition, we emit a CMP of g0 != g0, so all currently executing + /* We track our discarded pixels in f0.1/f1.0. By predicating on it, we + * can update just the flag bits that aren't yet discarded. If there's + * no condition, we emit a CMP of g0 != g0, so all currently executing * channels will get turned off. */ fs_inst *cmp = NULL;