* dead channels from affecting the result, we initialize the flag with
* with the identity value for the logical operation.
*/
- ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0));
+ if (dispatch_width == 32) {
+ /* For SIMD32, we use a UD type so we fill both f0.0 and f0.1. */
+ ubld.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(0));
+ } else {
+ ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0));
+ }
bld.CMP(bld.null_reg_d(), get_nir_src(instr->src[0]), brw_imm_d(0), BRW_CONDITIONAL_NZ);
bld.MOV(dest, brw_imm_d(-1));
- set_predicate(dispatch_width == 8 ?
- BRW_PREDICATE_ALIGN1_ANY8H :
- BRW_PREDICATE_ALIGN1_ANY16H,
+ set_predicate(dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ANY8H :
+ dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ANY16H :
+ BRW_PREDICATE_ALIGN1_ANY32H,
bld.SEL(dest, dest, brw_imm_d(0)));
break;
}
* dead channels from affecting the result, we initialize the flag with
* with the identity value for the logical operation.
*/
- ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff));
+ if (dispatch_width == 32) {
+ /* For SIMD32, we use a UD type so we fill both f0.0 and f0.1. */
+ ubld.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(0xffffffff));
+ } else {
+ ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff));
+ }
bld.CMP(bld.null_reg_d(), get_nir_src(instr->src[0]), brw_imm_d(0), BRW_CONDITIONAL_NZ);
bld.MOV(dest, brw_imm_d(-1));
- set_predicate(dispatch_width == 8 ?
- BRW_PREDICATE_ALIGN1_ALL8H :
- BRW_PREDICATE_ALIGN1_ALL16H,
+ set_predicate(dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ALL8H :
+ dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ALL16H :
+ BRW_PREDICATE_ALIGN1_ALL32H,
bld.SEL(dest, dest, brw_imm_d(0)));
break;
}
* dead channels from affecting the result, we initialize the flag with
* with the identity value for the logical operation.
*/
- ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff));
+ if (dispatch_width == 32) {
+ /* For SIMD32, we use a UD type so we fill both f0.0 and f0.1. */
+ ubld.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(0xffffffff));
+ } else {
+ ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff));
+ }
bld.CMP(bld.null_reg_d(), value, uniformized, BRW_CONDITIONAL_Z);
bld.MOV(dest, brw_imm_d(-1));
- set_predicate(dispatch_width == 8 ?
- BRW_PREDICATE_ALIGN1_ALL8H :
- BRW_PREDICATE_ALIGN1_ALL16H,
+ set_predicate(dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ALL8H :
+ dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ALL16H :
+ BRW_PREDICATE_ALIGN1_ALL32H,
bld.SEL(dest, dest, brw_imm_d(0)));
break;
}
case nir_intrinsic_ballot: {
const fs_reg value = retype(get_nir_src(instr->src[0]),
BRW_REGISTER_TYPE_UD);
- const struct brw_reg flag = retype(brw_flag_reg(0, 0),
- BRW_REGISTER_TYPE_UD);
+ struct brw_reg flag = brw_flag_reg(0, 0);
+ /* FIXME: For SIMD32 programs, this causes us to stomp on f0.1 as well
+ * as f0.0. This is a problem for fragment programs as we currently use
+ * f0.1 for discards. Fortunately, we don't support SIMD32 fragment
+ * programs yet so this isn't a problem. When we do, something will
+ * have to change.
+ */
+ if (dispatch_width == 32)
+ flag.type = BRW_REGISTER_TYPE_UD;
bld.exec_all().MOV(flag, brw_imm_ud(0u));
bld.CMP(bld.null_reg_ud(), value, brw_imm_ud(0u), BRW_CONDITIONAL_NZ);