if (cluster_size > 1) {
const fs_builder ubld = exec_all().group(dispatch_width() / 2, 0);
- dst_reg left = horiz_stride(tmp, 2);
- dst_reg right = horiz_stride(horiz_offset(tmp, 1), 2);
-
- /* From the Cherryview PRM Vol. 7, "Register Region Restrictiosn":
- *
- * "When source or destination datatype is 64b or operation is
- * integer DWord multiply, regioning in Align1 must follow
- * these rules:
- *
- * [...]
- *
- * 3. Source and Destination offset must be the same, except
- * the case of scalar source."
- *
- * In order to work around this, we create a temporary register
- * and shift left over to match right. If we have a 64-bit type,
- * we have to use two integer MOVs instead of a 64-bit MOV.
- */
- if (need_matching_subreg_offset(opcode, tmp.type)) {
- dst_reg tmp2 = vgrf(tmp.type);
- dst_reg new_left = horiz_stride(horiz_offset(tmp2, 1), 2);
- if (type_sz(tmp.type) > 4) {
- ubld.MOV(subscript(new_left, BRW_REGISTER_TYPE_D, 0),
- subscript(left, BRW_REGISTER_TYPE_D, 0));
- ubld.MOV(subscript(new_left, BRW_REGISTER_TYPE_D, 1),
- subscript(left, BRW_REGISTER_TYPE_D, 1));
- } else {
- ubld.MOV(new_left, left);
- }
- left = new_left;
- }
+ const dst_reg left = horiz_stride(tmp, 2);
+ const dst_reg right = horiz_stride(horiz_offset(tmp, 1), 2);
set_condmod(mod, ubld.emit(opcode, right, left, right));
}
if (cluster_size > 2) {
- if (type_sz(tmp.type) <= 4 &&
- !need_matching_subreg_offset(opcode, tmp.type)) {
+ if (type_sz(tmp.type) <= 4) {
const fs_builder ubld =
exec_all().group(dispatch_width() / 4, 0);
src_reg left = horiz_stride(horiz_offset(tmp, 1), 4);
}
}
-
- /* From the Cherryview PRM Vol. 7, "Register Region Restrictiosn":
- *
- * "When source or destination datatype is 64b or operation is
- * integer DWord multiply, regioning in Align1 must follow
- * these rules:
- *
- * [...]
- *
- * 3. Source and Destination offset must be the same, except
- * the case of scalar source."
- *
- * This helper just detects when we're in this case.
- */
- bool
- need_matching_subreg_offset(enum opcode opcode,
- enum brw_reg_type type) const
- {
- if (!shader->devinfo->is_cherryview &&
- !gen_device_info_is_9lp(shader->devinfo))
- return false;
-
- if (type_sz(type) > 4)
- return true;
-
- if (opcode == BRW_OPCODE_MUL &&
- !brw_reg_type_is_floating_point(type))
- return true;
-
- return false;
- }
-
bblock_t *block;
exec_node *cursor;
case nir_op_i2i64:
case nir_op_u2f64:
case nir_op_u2u64:
- /* CHV PRM, vol07, 3D Media GPGPU Engine, Register Region Restrictions:
- *
- * "When source or destination is 64b (...), regioning in Align1
- * must follow these rules:
- *
- * 1. Source and destination horizontal stride must be aligned to
- * the same qword.
- * (...)"
- *
- * This means that conversions from bit-sizes smaller than 64-bit to
- * 64-bit need to have the source data elements aligned to 64-bit.
- * This restriction does not apply to BDW and later.
- */
- if (nir_dest_bit_size(instr->dest.dest) == 64 &&
- nir_src_bit_size(instr->src[0].src) < 64 &&
- (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
- fs_reg tmp = bld.vgrf(result.type, 1);
- tmp = subscript(tmp, op[0].type, 0);
- inst = bld.MOV(tmp, op[0]);
- inst = bld.MOV(result, tmp);
- inst->saturate = instr->dest.saturate;
- break;
- }
- /* fallthrough */
case nir_op_f2f32:
case nir_op_f2i32:
case nir_op_f2u32:
unreachable("not reached: should have been lowered");
case nir_op_ishl:
+ bld.SHL(result, op[0], op[1]);
+ break;
case nir_op_ishr:
- case nir_op_ushr: {
- fs_reg shift_count = op[1];
-
- if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) {
- if (op[1].file == VGRF &&
- (result.type == BRW_REGISTER_TYPE_Q ||
- result.type == BRW_REGISTER_TYPE_UQ)) {
- shift_count = fs_reg(VGRF, alloc.allocate(dispatch_width / 4),
- BRW_REGISTER_TYPE_UD);
- shift_count.stride = 2;
- bld.MOV(shift_count, op[1]);
- }
- }
-
- switch (instr->op) {
- case nir_op_ishl:
- bld.SHL(result, op[0], shift_count);
- break;
- case nir_op_ishr:
- bld.ASR(result, op[0], shift_count);
- break;
- case nir_op_ushr:
- bld.SHR(result, op[0], shift_count);
- break;
- default:
- unreachable("not reached");
- }
+ bld.ASR(result, op[0], op[1]);
+ break;
+ case nir_op_ushr:
+ bld.SHR(result, op[0], op[1]);
break;
- }
case nir_op_pack_half_2x16_split:
bld.emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1]);
const fs_reg tmp_left = horiz_stride(tmp, 2);
const fs_reg tmp_right = horiz_stride(horiz_offset(tmp, 1), 2);
- /* From the Cherryview PRM Vol. 7, "Register Region Restrictiosn":
- *
- * "When source or destination datatype is 64b or operation is
- * integer DWord multiply, regioning in Align1 must follow
- * these rules:
- *
- * [...]
- *
- * 3. Source and Destination offset must be the same, except
- * the case of scalar source."
- *
- * In order to work around this, we have to emit two 32-bit MOVs instead
- * of a single 64-bit MOV to do the shuffle.
- */
- if (type_sz(value.type) > 4 &&
- (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
- ubld.MOV(subscript(tmp_left, BRW_REGISTER_TYPE_D, 0),
- subscript(src_right, BRW_REGISTER_TYPE_D, 0));
- ubld.MOV(subscript(tmp_left, BRW_REGISTER_TYPE_D, 1),
- subscript(src_right, BRW_REGISTER_TYPE_D, 1));
- ubld.MOV(subscript(tmp_right, BRW_REGISTER_TYPE_D, 0),
- subscript(src_left, BRW_REGISTER_TYPE_D, 0));
- ubld.MOV(subscript(tmp_right, BRW_REGISTER_TYPE_D, 1),
- subscript(src_left, BRW_REGISTER_TYPE_D, 1));
- } else {
- ubld.MOV(tmp_left, src_right);
- ubld.MOV(tmp_right, src_left);
- }
+ ubld.MOV(tmp_left, src_right);
+ ubld.MOV(tmp_right, src_left);
+
bld.MOV(retype(dest, value.type), tmp);
break;
}