if (dest.file == BRW_MESSAGE_REGISTER_FILE)
assert((dest.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen));
- else if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE)
+ else if (dest.file == BRW_GENERAL_REGISTER_FILE)
assert(dest.nr < 128);
- gen7_convert_mrf_to_grf(p, &dest);
+ /* The hardware has a restriction where if the destination is Byte,
+ * the instruction needs to have a stride of 2 (except for packed byte
+ * MOV). This seems to be required even if the destination is the NULL
+ * register.
+ */
+ if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+ dest.nr == BRW_ARF_NULL &&
+ type_sz(dest.type) == 1) {
+ dest.hstride = BRW_HORIZONTAL_STRIDE_2;
+ }
- brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type);
- brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
+ gen7_convert_mrf_to_grf(p, &dest);
- if (dest.address_mode == BRW_ADDRESS_DIRECT) {
+ if (devinfo->gen >= 12 &&
+ (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) {
+ assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
+ dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
+ assert(dest.address_mode == BRW_ADDRESS_DIRECT);
+ assert(dest.subnr == 0);
+ assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
+ (dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+ dest.vstride == dest.width + 1));
+ assert(!dest.negate && !dest.abs);
+ brw_inst_set_dst_reg_file(devinfo, inst, dest.file);
brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
- if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
- brw_inst_set_dst_da1_subreg_nr(devinfo, inst, dest.subnr);
- if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
- dest.hstride = BRW_HORIZONTAL_STRIDE_1;
- brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
- } else {
- brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
- brw_inst_set_da16_writemask(devinfo, inst, dest.writemask);
- if (dest.file == BRW_GENERAL_REGISTER_FILE ||
- dest.file == BRW_MESSAGE_REGISTER_FILE) {
- assert(dest.writemask != 0);
- }
- /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
- * Although Dst.HorzStride is a don't care for Align16, HW needs
- * this to be programmed as "01".
- */
- brw_inst_set_dst_hstride(devinfo, inst, 1);
- }
+ } else if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) {
+ assert(devinfo->gen < 12);
+ assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
+ dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
+ assert(dest.address_mode == BRW_ADDRESS_DIRECT);
+ assert(dest.subnr % 16 == 0);
+ assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+ dest.vstride == dest.width + 1);
+ assert(!dest.negate && !dest.abs);
+ brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
+ brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
+ brw_inst_set_send_dst_reg_file(devinfo, inst, dest.file);
} else {
- brw_inst_set_dst_ia_subreg_nr(devinfo, inst, dest.subnr);
+ brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type);
+ brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
- /* These are different sizes in align1 vs align16:
- */
- if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
- brw_inst_set_dst_ia1_addr_imm(devinfo, inst,
- dest.indirect_offset);
- if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
- dest.hstride = BRW_HORIZONTAL_STRIDE_1;
- brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
+ if (dest.address_mode == BRW_ADDRESS_DIRECT) {
+ brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
+
+ if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+ brw_inst_set_dst_da1_subreg_nr(devinfo, inst, dest.subnr);
+ if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+ dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+ brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
+ } else {
+ brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
+ brw_inst_set_da16_writemask(devinfo, inst, dest.writemask);
+ if (dest.file == BRW_GENERAL_REGISTER_FILE ||
+ dest.file == BRW_MESSAGE_REGISTER_FILE) {
+ assert(dest.writemask != 0);
+ }
+ /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
+ * Although Dst.HorzStride is a don't care for Align16, HW needs
+ * this to be programmed as "01".
+ */
+ brw_inst_set_dst_hstride(devinfo, inst, 1);
+ }
} else {
- brw_inst_set_dst_ia16_addr_imm(devinfo, inst,
- dest.indirect_offset);
- /* even ignored in da16, still need to set as '01' */
- brw_inst_set_dst_hstride(devinfo, inst, 1);
+ brw_inst_set_dst_ia_subreg_nr(devinfo, inst, dest.subnr);
+
+ /* These are different sizes in align1 vs align16:
+ */
+ if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+ brw_inst_set_dst_ia1_addr_imm(devinfo, inst,
+ dest.indirect_offset);
+ if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+ dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+ brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
+ } else {
+ brw_inst_set_dst_ia16_addr_imm(devinfo, inst,
+ dest.indirect_offset);
+ /* even ignored in da16, still need to set as '01' */
+ brw_inst_set_dst_hstride(devinfo, inst, 1);
+ }
}
}
/* Generators should set a default exec_size of either 8 (SIMD4x2 or SIMD8)
* or 16 (SIMD16), as that's normally correct. However, when dealing with
- * small registers, we automatically reduce it to match the register size.
- *
- * In platforms that support fp64 we can emit instructions with a width of
- * 4 that need two SIMD8 registers and an exec_size of 8 or 16. In these
- * cases we need to make sure that these instructions have their exec sizes
- * set properly when they are emitted and we can't rely on this code to fix
- * it.
+ * small registers, it can be useful for us to automatically reduce it to
+ * match the register size.
*/
- bool fix_exec_size;
- if (devinfo->gen >= 6)
- fix_exec_size = dest.width < BRW_EXECUTE_4;
- else
- fix_exec_size = dest.width < BRW_EXECUTE_8;
+ if (p->automatic_exec_sizes) {
+ /*
+ * In platforms that support fp64 we can emit instructions with a width
+ * of 4 that need two SIMD8 registers and an exec_size of 8 or 16. In
+ * these cases we need to make sure that these instructions have their
+ * exec sizes set properly when they are emitted and we can't rely on
+ * this code to fix it.
+ */
+ bool fix_exec_size;
+ if (devinfo->gen >= 6)
+ fix_exec_size = dest.width < BRW_EXECUTE_4;
+ else
+ fix_exec_size = dest.width < BRW_EXECUTE_8;
- if (fix_exec_size)
- brw_inst_set_exec_size(devinfo, inst, dest.width);
+ if (fix_exec_size)
+ brw_inst_set_exec_size(devinfo, inst, dest.width);
+ }
}
void
if (reg.file == BRW_MESSAGE_REGISTER_FILE)
assert((reg.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen));
- else if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
+ else if (reg.file == BRW_GENERAL_REGISTER_FILE)
assert(reg.nr < 128);
gen7_convert_mrf_to_grf(p, ®);
- if (devinfo->gen >= 6 && (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
- brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) {
+ if (devinfo->gen >= 6 &&
+ (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC)) {
/* Any source modifiers or regions will be ignored, since this just
* identifies the MRF/GRF to start reading the message contents from.
* Check for some likely failures.
assert(reg.address_mode == BRW_ADDRESS_DIRECT);
}
- brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type);
- brw_inst_set_src0_abs(devinfo, inst, reg.abs);
- brw_inst_set_src0_negate(devinfo, inst, reg.negate);
- brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
-
- if (reg.file == BRW_IMMEDIATE_VALUE) {
- if (reg.type == BRW_REGISTER_TYPE_DF ||
- brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DIM)
- brw_inst_set_imm_df(devinfo, inst, reg.df);
- else if (reg.type == BRW_REGISTER_TYPE_UQ ||
- reg.type == BRW_REGISTER_TYPE_Q)
- brw_inst_set_imm_uq(devinfo, inst, reg.u64);
- else
- brw_inst_set_imm_ud(devinfo, inst, reg.ud);
-
- if (type_sz(reg.type) < 8) {
- brw_inst_set_src1_reg_file(devinfo, inst,
- BRW_ARCHITECTURE_REGISTER_FILE);
- brw_inst_set_src1_reg_hw_type(devinfo, inst,
- brw_inst_src0_reg_hw_type(devinfo, inst));
- }
+ if (devinfo->gen >= 12 &&
+ (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) {
+ assert(reg.file != BRW_IMMEDIATE_VALUE);
+ assert(reg.address_mode == BRW_ADDRESS_DIRECT);
+ assert(reg.subnr == 0);
+ assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
+ (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+ reg.vstride == reg.width + 1));
+ assert(!reg.negate && !reg.abs);
+ brw_inst_set_send_src0_reg_file(devinfo, inst, reg.file);
+ brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
+
+ } else if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) {
+ assert(reg.file == BRW_GENERAL_REGISTER_FILE);
+ assert(reg.address_mode == BRW_ADDRESS_DIRECT);
+ assert(reg.subnr % 16 == 0);
+ assert(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+ reg.vstride == reg.width + 1);
+ assert(!reg.negate && !reg.abs);
+ brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
+ brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
} else {
- if (reg.address_mode == BRW_ADDRESS_DIRECT) {
- brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
- if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
- brw_inst_set_src0_da1_subreg_nr(devinfo, inst, reg.subnr);
- } else {
- brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
- }
+ brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type);
+ brw_inst_set_src0_abs(devinfo, inst, reg.abs);
+ brw_inst_set_src0_negate(devinfo, inst, reg.negate);
+ brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
+
+ if (reg.file == BRW_IMMEDIATE_VALUE) {
+ if (reg.type == BRW_REGISTER_TYPE_DF ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DIM)
+ brw_inst_set_imm_df(devinfo, inst, reg.df);
+ else if (reg.type == BRW_REGISTER_TYPE_UQ ||
+ reg.type == BRW_REGISTER_TYPE_Q)
+ brw_inst_set_imm_uq(devinfo, inst, reg.u64);
+ else
+ brw_inst_set_imm_ud(devinfo, inst, reg.ud);
+
+ if (devinfo->gen < 12 && type_sz(reg.type) < 8) {
+ brw_inst_set_src1_reg_file(devinfo, inst,
+ BRW_ARCHITECTURE_REGISTER_FILE);
+ brw_inst_set_src1_reg_hw_type(devinfo, inst,
+ brw_inst_src0_reg_hw_type(devinfo, inst));
+ }
} else {
- brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr);
+ if (reg.address_mode == BRW_ADDRESS_DIRECT) {
+ brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
+ if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+ brw_inst_set_src0_da1_subreg_nr(devinfo, inst, reg.subnr);
+ } else {
+ brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
+ }
+ } else {
+ brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr);
- if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
- brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset);
- } else {
- brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset);
- }
- }
+ if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+ brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset);
+ } else {
+ brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset);
+ }
+ }
- if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
- if (reg.width == BRW_WIDTH_1 &&
- brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
- brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
- brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1);
- brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
- } else {
- brw_inst_set_src0_hstride(devinfo, inst, reg.hstride);
- brw_inst_set_src0_width(devinfo, inst, reg.width);
- brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
- }
- } else {
- brw_inst_set_src0_da16_swiz_x(devinfo, inst,
- BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
- brw_inst_set_src0_da16_swiz_y(devinfo, inst,
- BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
- brw_inst_set_src0_da16_swiz_z(devinfo, inst,
- BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
- brw_inst_set_src0_da16_swiz_w(devinfo, inst,
- BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
-
- if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
- /* This is an oddity of the fact we're using the same
- * descriptions for registers in align_16 as align_1:
- */
- brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
- } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
- reg.type == BRW_REGISTER_TYPE_DF &&
- reg.vstride == BRW_VERTICAL_STRIDE_2) {
- /* From SNB PRM:
- *
- * "For Align16 access mode, only encodings of 0000 and 0011
- * are allowed. Other codes are reserved."
- *
- * Presumably the DevSNB behavior applies to IVB as well.
- */
- brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+ if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+ if (reg.width == BRW_WIDTH_1 &&
+ brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
+ brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
+ brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1);
+ brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
+ } else {
+ brw_inst_set_src0_hstride(devinfo, inst, reg.hstride);
+ brw_inst_set_src0_width(devinfo, inst, reg.width);
+ brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
+ }
} else {
- brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
+ brw_inst_set_src0_da16_swiz_x(devinfo, inst,
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
+ brw_inst_set_src0_da16_swiz_y(devinfo, inst,
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
+ brw_inst_set_src0_da16_swiz_z(devinfo, inst,
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
+ brw_inst_set_src0_da16_swiz_w(devinfo, inst,
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
+
+ if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
+ /* This is an oddity of the fact we're using the same
+ * descriptions for registers in align_16 as align_1:
+ */
+ brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+ } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
+ reg.type == BRW_REGISTER_TYPE_DF &&
+ reg.vstride == BRW_VERTICAL_STRIDE_2) {
+ /* From SNB PRM:
+ *
+ * "For Align16 access mode, only encodings of 0000 and 0011
+ * are allowed. Other codes are reserved."
+ *
+ * Presumably the DevSNB behavior applies to IVB as well.
+ */
+ brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+ } else {
+ brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
+ }
}
}
}
{
const struct gen_device_info *devinfo = p->devinfo;
- if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
+ if (reg.file == BRW_GENERAL_REGISTER_FILE)
assert(reg.nr < 128);
- /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
- *
- * "Accumulator registers may be accessed explicitly as src0
- * operands only."
- */
- assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
- reg.nr != BRW_ARF_ACCUMULATOR);
-
- gen7_convert_mrf_to_grf(p, ®);
- assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+ if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC ||
+ (devinfo->gen >= 12 &&
+ (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC))) {
+ assert(reg.file == BRW_GENERAL_REGISTER_FILE ||
+ reg.file == BRW_ARCHITECTURE_REGISTER_FILE);
+ assert(reg.address_mode == BRW_ADDRESS_DIRECT);
+ assert(reg.subnr == 0);
+ assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
+ (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+ reg.vstride == reg.width + 1));
+ assert(!reg.negate && !reg.abs);
+ brw_inst_set_send_src1_reg_nr(devinfo, inst, reg.nr);
+ brw_inst_set_send_src1_reg_file(devinfo, inst, reg.file);
+ } else {
+ /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
+ *
+ * "Accumulator registers may be accessed explicitly as src0
+ * operands only."
+ */
+ assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
+ reg.nr != BRW_ARF_ACCUMULATOR);
- brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type);
- brw_inst_set_src1_abs(devinfo, inst, reg.abs);
- brw_inst_set_src1_negate(devinfo, inst, reg.negate);
+ gen7_convert_mrf_to_grf(p, ®);
+ assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
- /* Only src1 can be immediate in two-argument instructions.
- */
- assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE);
+ brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type);
+ brw_inst_set_src1_abs(devinfo, inst, reg.abs);
+ brw_inst_set_src1_negate(devinfo, inst, reg.negate);
- if (reg.file == BRW_IMMEDIATE_VALUE) {
- /* two-argument instructions can only use 32-bit immediates */
- assert(type_sz(reg.type) < 8);
- brw_inst_set_imm_ud(devinfo, inst, reg.ud);
- } else {
- /* This is a hardware restriction, which may or may not be lifted
- * in the future:
+ /* Only src1 can be immediate in two-argument instructions.
*/
- assert (reg.address_mode == BRW_ADDRESS_DIRECT);
- /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
+ assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE);
- brw_inst_set_src1_da_reg_nr(devinfo, inst, reg.nr);
- if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
- brw_inst_set_src1_da1_subreg_nr(devinfo, inst, reg.subnr);
+ if (reg.file == BRW_IMMEDIATE_VALUE) {
+ /* two-argument instructions can only use 32-bit immediates */
+ assert(type_sz(reg.type) < 8);
+ brw_inst_set_imm_ud(devinfo, inst, reg.ud);
} else {
- brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
- }
+ /* This is a hardware restriction, which may or may not be lifted
+ * in the future:
+ */
+ assert (reg.address_mode == BRW_ADDRESS_DIRECT);
+ /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
- if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
- if (reg.width == BRW_WIDTH_1 &&
- brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
- brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
- brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1);
- brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
- } else {
- brw_inst_set_src1_hstride(devinfo, inst, reg.hstride);
- brw_inst_set_src1_width(devinfo, inst, reg.width);
- brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
- }
- } else {
- brw_inst_set_src1_da16_swiz_x(devinfo, inst,
- BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
- brw_inst_set_src1_da16_swiz_y(devinfo, inst,
- BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
- brw_inst_set_src1_da16_swiz_z(devinfo, inst,
- BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
- brw_inst_set_src1_da16_swiz_w(devinfo, inst,
- BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
-
- if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
- /* This is an oddity of the fact we're using the same
- * descriptions for registers in align_16 as align_1:
- */
- brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
- } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
- reg.type == BRW_REGISTER_TYPE_DF &&
- reg.vstride == BRW_VERTICAL_STRIDE_2) {
- /* From SNB PRM:
- *
- * "For Align16 access mode, only encodings of 0000 and 0011
- * are allowed. Other codes are reserved."
- *
- * Presumably the DevSNB behavior applies to IVB as well.
- */
- brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+ brw_inst_set_src1_da_reg_nr(devinfo, inst, reg.nr);
+ if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+ brw_inst_set_src1_da1_subreg_nr(devinfo, inst, reg.subnr);
} else {
- brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
+ brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
+ }
+
+ if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+ if (reg.width == BRW_WIDTH_1 &&
+ brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
+ brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
+ brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1);
+ brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
+ } else {
+ brw_inst_set_src1_hstride(devinfo, inst, reg.hstride);
+ brw_inst_set_src1_width(devinfo, inst, reg.width);
+ brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
+ }
+ } else {
+ brw_inst_set_src1_da16_swiz_x(devinfo, inst,
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
+ brw_inst_set_src1_da16_swiz_y(devinfo, inst,
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
+ brw_inst_set_src1_da16_swiz_z(devinfo, inst,
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
+ brw_inst_set_src1_da16_swiz_w(devinfo, inst,
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
+
+ if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
+ /* This is an oddity of the fact we're using the same
+ * descriptions for registers in align_16 as align_1:
+ */
+ brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+ } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
+ reg.type == BRW_REGISTER_TYPE_DF &&
+ reg.vstride == BRW_VERTICAL_STRIDE_2) {
+ /* From SNB PRM:
+ *
+ * "For Align16 access mode, only encodings of 0000 and 0011
+ * are allowed. Other codes are reserved."
+ *
+ * Presumably the DevSNB behavior applies to IVB as well.
+ */
+ brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+ } else {
+ brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
+ }
}
}
}
}
/**
- * Set the Message Descriptor and Extended Message Descriptor fields
- * for SEND messages.
- *
- * \note This zeroes out the Function Control bits, so it must be called
- * \b before filling out any message-specific data. Callers can
- * choose not to fill in irrelevant bits; they will be zero.
+ * Specify the descriptor and extended descriptor immediate for a SEND(C)
+ * message instruction.
*/
void
-brw_set_message_descriptor(struct brw_codegen *p,
- brw_inst *inst,
- enum brw_message_target sfid,
- unsigned msg_length,
- unsigned response_length,
- bool header_present,
- bool end_of_thread)
+brw_set_desc_ex(struct brw_codegen *p, brw_inst *inst,
+ unsigned desc, unsigned ex_desc)
{
const struct gen_device_info *devinfo = p->devinfo;
-
- brw_set_src1(p, inst, brw_imm_d(0));
-
- /* For indirect sends, `inst` will not be the SEND/SENDC instruction
- * itself; instead, it will be a MOV/OR into the address register.
- *
- * In this case, we avoid setting the extended message descriptor bits,
- * since they go on the later SEND/SENDC instead and if set here would
- * instead clobber the conditionalmod bits.
- */
- unsigned opcode = brw_inst_opcode(devinfo, inst);
- if (opcode == BRW_OPCODE_SEND || opcode == BRW_OPCODE_SENDC) {
- brw_inst_set_sfid(devinfo, inst, sfid);
- }
-
- brw_inst_set_mlen(devinfo, inst, msg_length);
- brw_inst_set_rlen(devinfo, inst, response_length);
- brw_inst_set_eot(devinfo, inst, end_of_thread);
-
- if (devinfo->gen >= 5) {
- brw_inst_set_header_present(devinfo, inst, header_present);
- }
+ assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC);
+ if (devinfo->gen < 12)
+ brw_inst_set_src1_file_type(devinfo, inst,
+ BRW_IMMEDIATE_VALUE, BRW_REGISTER_TYPE_UD);
+ brw_inst_set_send_desc(devinfo, inst, desc);
+ if (devinfo->gen >= 9)
+ brw_inst_set_send_ex_desc(devinfo, inst, ex_desc);
}
static void brw_set_math_message( struct brw_codegen *p,
break;
}
+ brw_set_desc(p, inst, brw_message_desc(
+ devinfo, msg_length, response_length, false));
- brw_set_message_descriptor(p, inst, BRW_SFID_MATH,
- msg_length, response_length, false, false);
+ brw_inst_set_sfid(devinfo, inst, BRW_SFID_MATH);
brw_inst_set_math_msg_function(devinfo, inst, function);
brw_inst_set_math_msg_signed_int(devinfo, inst, integer_type);
brw_inst_set_math_msg_precision(devinfo, inst, low_precision);
{
const struct gen_device_info *devinfo = p->devinfo;
- brw_set_message_descriptor(p, insn, BRW_SFID_URB,
- 1, response_length, true, end_of_thread);
+ brw_set_desc(p, insn, brw_message_desc(
+ devinfo, 1, response_length, true));
+
+ brw_inst_set_sfid(devinfo, insn, BRW_SFID_URB);
+ brw_inst_set_eot(devinfo, insn, end_of_thread);
brw_inst_set_urb_opcode(devinfo, insn, 1); /* FF_SYNC */
brw_inst_set_urb_allocate(devinfo, insn, allocate);
/* The following fields are not used by FF_SYNC: */
assert(devinfo->gen < 7 || !(flags & BRW_URB_WRITE_ALLOCATE));
assert(devinfo->gen >= 7 || !(flags & BRW_URB_WRITE_PER_SLOT_OFFSET));
- brw_set_message_descriptor(p, insn, BRW_SFID_URB,
- msg_length, response_length, true,
- flags & BRW_URB_WRITE_EOT);
+ brw_set_desc(p, insn, brw_message_desc(
+ devinfo, msg_length, response_length, true));
+
+ brw_inst_set_sfid(devinfo, insn, BRW_SFID_URB);
+ brw_inst_set_eot(devinfo, insn, !!(flags & BRW_URB_WRITE_EOT));
if (flags & BRW_URB_WRITE_OWORD) {
assert(msg_length == 2); /* header + one OWORD of data */
}
}
-void
-brw_set_dp_write_message(struct brw_codegen *p,
- brw_inst *insn,
- unsigned binding_table_index,
- unsigned msg_control,
- unsigned msg_type,
- unsigned target_cache,
- unsigned msg_length,
- bool header_present,
- unsigned last_render_target,
- unsigned response_length,
- unsigned end_of_thread,
- unsigned send_commit_msg)
-{
- const struct gen_device_info *devinfo = p->devinfo;
- const unsigned sfid = (devinfo->gen >= 6 ? target_cache :
- BRW_SFID_DATAPORT_WRITE);
-
- brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
- header_present, end_of_thread);
-
- brw_inst_set_binding_table_index(devinfo, insn, binding_table_index);
- brw_inst_set_dp_write_msg_type(devinfo, insn, msg_type);
- brw_inst_set_dp_write_msg_control(devinfo, insn, msg_control);
- brw_inst_set_rt_last(devinfo, insn, last_render_target);
- if (devinfo->gen < 7) {
- brw_inst_set_dp_write_commit(devinfo, insn, send_commit_msg);
- }
-}
-
-void
-brw_set_dp_read_message(struct brw_codegen *p,
- brw_inst *insn,
- unsigned binding_table_index,
- unsigned msg_control,
- unsigned msg_type,
- unsigned target_cache,
- unsigned msg_length,
- bool header_present,
- unsigned response_length)
-{
- const struct gen_device_info *devinfo = p->devinfo;
- const unsigned sfid = (devinfo->gen >= 6 ? target_cache :
- BRW_SFID_DATAPORT_READ);
-
- brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
- header_present, false);
-
- brw_inst_set_binding_table_index(devinfo, insn, binding_table_index);
- brw_inst_set_dp_read_msg_type(devinfo, insn, msg_type);
- brw_inst_set_dp_read_msg_control(devinfo, insn, msg_control);
- if (devinfo->gen < 6)
- brw_inst_set_dp_read_target_cache(devinfo, insn, target_cache);
-}
-
-void
-brw_set_sampler_message(struct brw_codegen *p,
- brw_inst *inst,
- unsigned binding_table_index,
- unsigned sampler,
- unsigned msg_type,
- unsigned response_length,
- unsigned msg_length,
- unsigned header_present,
- unsigned simd_mode,
- unsigned return_format)
-{
- const struct gen_device_info *devinfo = p->devinfo;
-
- brw_set_message_descriptor(p, inst, BRW_SFID_SAMPLER, msg_length,
- response_length, header_present, false);
-
- brw_inst_set_binding_table_index(devinfo, inst, binding_table_index);
- brw_inst_set_sampler(devinfo, inst, sampler);
- brw_inst_set_sampler_msg_type(devinfo, inst, msg_type);
- if (devinfo->gen >= 5) {
- brw_inst_set_sampler_simd_mode(devinfo, inst, simd_mode);
- } else if (devinfo->gen == 4 && !devinfo->is_g4x) {
- brw_inst_set_sampler_return_format(devinfo, inst, return_format);
- }
-}
-
static void
gen7_set_dp_scratch_message(struct brw_codegen *p,
brw_inst *inst,
const unsigned block_size = (devinfo->gen >= 8 ? _mesa_logbase2(num_regs) :
num_regs - 1);
- brw_set_message_descriptor(p, inst, GEN7_SFID_DATAPORT_DATA_CACHE,
- mlen, rlen, header_present, false);
+ brw_set_desc(p, inst, brw_message_desc(
+ devinfo, mlen, rlen, header_present));
+
+ brw_inst_set_sfid(devinfo, inst, GEN7_SFID_DATAPORT_DATA_CACHE);
brw_inst_set_dp_category(devinfo, inst, 1); /* Scratch Block Read/Write msgs */
brw_inst_set_scratch_read_write(devinfo, inst, write);
brw_inst_set_scratch_type(devinfo, inst, dword);
brw_inst_set_scratch_addr_offset(devinfo, inst, addr_offset);
}
+static void
+brw_inst_set_state(const struct gen_device_info *devinfo,
+ brw_inst *insn,
+ const struct brw_insn_state *state)
+{
+ brw_inst_set_exec_size(devinfo, insn, state->exec_size);
+ brw_inst_set_group(devinfo, insn, state->group);
+ brw_inst_set_compression(devinfo, insn, state->compressed);
+ brw_inst_set_access_mode(devinfo, insn, state->access_mode);
+ brw_inst_set_mask_control(devinfo, insn, state->mask_control);
+ brw_inst_set_saturate(devinfo, insn, state->saturate);
+ brw_inst_set_pred_control(devinfo, insn, state->predicate);
+ brw_inst_set_pred_inv(devinfo, insn, state->pred_inv);
+
+ if (is_3src(devinfo, brw_inst_opcode(devinfo, insn)) &&
+ state->access_mode == BRW_ALIGN_16) {
+ brw_inst_set_3src_a16_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
+ if (devinfo->gen >= 7)
+ brw_inst_set_3src_a16_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
+ } else {
+ brw_inst_set_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
+ if (devinfo->gen >= 7)
+ brw_inst_set_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
+ }
+
+ if (devinfo->gen >= 6)
+ brw_inst_set_acc_wr_control(devinfo, insn, state->acc_wr_control);
+}
+
#define next_insn brw_next_insn
brw_inst *
brw_next_insn(struct brw_codegen *p, unsigned opcode)
p->next_insn_offset += 16;
insn = &p->store[p->nr_insn++];
- memcpy(insn, p->current, sizeof(*insn));
+ memset(insn, 0, sizeof(*insn));
brw_inst_set_opcode(devinfo, insn, opcode);
+
+ /* Apply the default instruction state */
+ brw_inst_set_state(devinfo, insn, p->current);
+
return insn;
}
return reg.subnr / 4;
}
+static enum gen10_align1_3src_vertical_stride
+to_3src_align1_vstride(const struct gen_device_info *devinfo,
+ enum brw_vertical_stride vstride)
+{
+ switch (vstride) {
+ case BRW_VERTICAL_STRIDE_0:
+ return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0;
+ case BRW_VERTICAL_STRIDE_1:
+ assert(devinfo->gen >= 12);
+ return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_1;
+ case BRW_VERTICAL_STRIDE_2:
+ assert(devinfo->gen < 12);
+ return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2;
+ case BRW_VERTICAL_STRIDE_4:
+ return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4;
+ case BRW_VERTICAL_STRIDE_8:
+ case BRW_VERTICAL_STRIDE_16:
+ return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8;
+ default:
+ unreachable("invalid vstride");
+ }
+}
+
+
+static enum gen10_align1_3src_src_horizontal_stride
+to_3src_align1_hstride(enum brw_horizontal_stride hstride)
+{
+ switch (hstride) {
+ case BRW_HORIZONTAL_STRIDE_0:
+ return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0;
+ case BRW_HORIZONTAL_STRIDE_1:
+ return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1;
+ case BRW_HORIZONTAL_STRIDE_2:
+ return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2;
+ case BRW_HORIZONTAL_STRIDE_4:
+ return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4;
+ default:
+ unreachable("invalid hstride");
+ }
+}
+
static brw_inst *
brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
gen7_convert_mrf_to_grf(p, &dest);
- assert(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16);
-
- assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
- dest.file == BRW_MESSAGE_REGISTER_FILE);
assert(dest.nr < 128);
+ assert(src0.file == BRW_IMMEDIATE_VALUE || src0.nr < 128);
+ assert(src1.file != BRW_IMMEDIATE_VALUE && src1.nr < 128);
+ assert(src2.file == BRW_IMMEDIATE_VALUE || src2.nr < 128);
assert(dest.address_mode == BRW_ADDRESS_DIRECT);
- assert(dest.type == BRW_REGISTER_TYPE_F ||
- dest.type == BRW_REGISTER_TYPE_DF ||
- dest.type == BRW_REGISTER_TYPE_D ||
- dest.type == BRW_REGISTER_TYPE_UD);
- if (devinfo->gen == 6) {
- brw_inst_set_3src_a16_dst_reg_file(devinfo, inst,
- dest.file == BRW_MESSAGE_REGISTER_FILE);
- }
- brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
- brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 16);
- brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask);
-
- assert(src0.file == BRW_GENERAL_REGISTER_FILE);
assert(src0.address_mode == BRW_ADDRESS_DIRECT);
- assert(src0.nr < 128);
- brw_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle);
- brw_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, get_3src_subreg_nr(src0));
- brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
- brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
- brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
- brw_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst,
- src0.vstride == BRW_VERTICAL_STRIDE_0);
-
- assert(src1.file == BRW_GENERAL_REGISTER_FILE);
assert(src1.address_mode == BRW_ADDRESS_DIRECT);
- assert(src1.nr < 128);
- brw_inst_set_3src_a16_src1_swizzle(devinfo, inst, src1.swizzle);
- brw_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, get_3src_subreg_nr(src1));
- brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
- brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
- brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
- brw_inst_set_3src_a16_src1_rep_ctrl(devinfo, inst,
- src1.vstride == BRW_VERTICAL_STRIDE_0);
-
- assert(src2.file == BRW_GENERAL_REGISTER_FILE);
assert(src2.address_mode == BRW_ADDRESS_DIRECT);
- assert(src2.nr < 128);
- brw_inst_set_3src_a16_src2_swizzle(devinfo, inst, src2.swizzle);
- brw_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, get_3src_subreg_nr(src2));
- brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
- brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
- brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
- brw_inst_set_3src_a16_src2_rep_ctrl(devinfo, inst,
- src2.vstride == BRW_VERTICAL_STRIDE_0);
-
- if (devinfo->gen >= 7) {
- /* Set both the source and destination types based on dest.type,
- * ignoring the source register types. The MAD and LRP emitters ensure
- * that all four types are float. The BFE and BFI2 emitters, however,
- * may send us mixed D and UD types and want us to ignore that and use
- * the destination type.
- */
- switch (dest.type) {
- case BRW_REGISTER_TYPE_F:
- brw_inst_set_3src_a16_src_hw_type(devinfo, inst, BRW_3SRC_TYPE_F);
- brw_inst_set_3src_a16_dst_hw_type(devinfo, inst, BRW_3SRC_TYPE_F);
- break;
- case BRW_REGISTER_TYPE_DF:
- brw_inst_set_3src_a16_src_hw_type(devinfo, inst, BRW_3SRC_TYPE_DF);
- brw_inst_set_3src_a16_dst_hw_type(devinfo, inst, BRW_3SRC_TYPE_DF);
- break;
- case BRW_REGISTER_TYPE_D:
- brw_inst_set_3src_a16_src_hw_type(devinfo, inst, BRW_3SRC_TYPE_D);
- brw_inst_set_3src_a16_dst_hw_type(devinfo, inst, BRW_3SRC_TYPE_D);
- break;
- case BRW_REGISTER_TYPE_UD:
- brw_inst_set_3src_a16_src_hw_type(devinfo, inst, BRW_3SRC_TYPE_UD);
- brw_inst_set_3src_a16_dst_hw_type(devinfo, inst, BRW_3SRC_TYPE_UD);
- break;
- default:
- unreachable("not reached");
+
+ if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+ assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
+ dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
+
+ if (devinfo->gen >= 12) {
+ brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, dest.file);
+ brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
+ } else {
+ if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE) {
+ brw_inst_set_3src_a1_dst_reg_file(devinfo, inst,
+ BRW_ALIGN1_3SRC_ACCUMULATOR);
+ brw_inst_set_3src_dst_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
+ } else {
+ brw_inst_set_3src_a1_dst_reg_file(devinfo, inst,
+ BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE);
+ brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
+ }
+ }
+ brw_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, dest.subnr / 8);
+
+ brw_inst_set_3src_a1_dst_hstride(devinfo, inst, BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1);
+
+ if (brw_reg_type_is_floating_point(dest.type)) {
+ brw_inst_set_3src_a1_exec_type(devinfo, inst,
+ BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
+ } else {
+ brw_inst_set_3src_a1_exec_type(devinfo, inst,
+ BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
+ }
+
+ brw_inst_set_3src_a1_dst_type(devinfo, inst, dest.type);
+ brw_inst_set_3src_a1_src0_type(devinfo, inst, src0.type);
+ brw_inst_set_3src_a1_src1_type(devinfo, inst, src1.type);
+ brw_inst_set_3src_a1_src2_type(devinfo, inst, src2.type);
+
+ brw_inst_set_3src_a1_src0_vstride(
+ devinfo, inst, to_3src_align1_vstride(devinfo, src0.vstride));
+ brw_inst_set_3src_a1_src1_vstride(
+ devinfo, inst, to_3src_align1_vstride(devinfo, src1.vstride));
+ /* no vstride on src2 */
+
+ brw_inst_set_3src_a1_src0_hstride(devinfo, inst,
+ to_3src_align1_hstride(src0.hstride));
+ brw_inst_set_3src_a1_src1_hstride(devinfo, inst,
+ to_3src_align1_hstride(src1.hstride));
+ brw_inst_set_3src_a1_src2_hstride(devinfo, inst,
+ to_3src_align1_hstride(src2.hstride));
+
+ brw_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, src0.subnr);
+ if (src0.type == BRW_REGISTER_TYPE_NF) {
+ brw_inst_set_3src_src0_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
+ } else {
+ brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
+ }
+ brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
+ brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
+
+ brw_inst_set_3src_a1_src1_subreg_nr(devinfo, inst, src1.subnr);
+ if (src1.file == BRW_ARCHITECTURE_REGISTER_FILE) {
+ brw_inst_set_3src_src1_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
+ } else {
+ brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
+ }
+ brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
+ brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
+
+ brw_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, src2.subnr);
+ brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
+ brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
+ brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
+
+ assert(src0.file == BRW_GENERAL_REGISTER_FILE ||
+ src0.file == BRW_IMMEDIATE_VALUE ||
+ (src0.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+ src0.type == BRW_REGISTER_TYPE_NF));
+ assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
+ src1.file == BRW_ARCHITECTURE_REGISTER_FILE);
+ assert(src2.file == BRW_GENERAL_REGISTER_FILE ||
+ src2.file == BRW_IMMEDIATE_VALUE);
+
+ if (devinfo->gen >= 12) {
+ brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, src0.file);
+ brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, src1.file);
+ brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, src2.file);
+ } else {
+ brw_inst_set_3src_a1_src0_reg_file(devinfo, inst,
+ src0.file == BRW_GENERAL_REGISTER_FILE ?
+ BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
+ BRW_ALIGN1_3SRC_IMMEDIATE_VALUE);
+ brw_inst_set_3src_a1_src1_reg_file(devinfo, inst,
+ src1.file == BRW_GENERAL_REGISTER_FILE ?
+ BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
+ BRW_ALIGN1_3SRC_ACCUMULATOR);
+ brw_inst_set_3src_a1_src2_reg_file(devinfo, inst,
+ src2.file == BRW_GENERAL_REGISTER_FILE ?
+ BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
+ BRW_ALIGN1_3SRC_IMMEDIATE_VALUE);
+ }
+
+ } else {
+ assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
+ dest.file == BRW_MESSAGE_REGISTER_FILE);
+ assert(dest.type == BRW_REGISTER_TYPE_F ||
+ dest.type == BRW_REGISTER_TYPE_DF ||
+ dest.type == BRW_REGISTER_TYPE_D ||
+ dest.type == BRW_REGISTER_TYPE_UD ||
+ (dest.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 8));
+ if (devinfo->gen == 6) {
+ brw_inst_set_3src_a16_dst_reg_file(devinfo, inst,
+ dest.file == BRW_MESSAGE_REGISTER_FILE);
+ }
+ brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
+ brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 16);
+ brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask);
+
+ assert(src0.file == BRW_GENERAL_REGISTER_FILE);
+ brw_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle);
+ brw_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, get_3src_subreg_nr(src0));
+ brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
+ brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
+ brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
+ brw_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst,
+ src0.vstride == BRW_VERTICAL_STRIDE_0);
+
+ assert(src1.file == BRW_GENERAL_REGISTER_FILE);
+ brw_inst_set_3src_a16_src1_swizzle(devinfo, inst, src1.swizzle);
+ brw_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, get_3src_subreg_nr(src1));
+ brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
+ brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
+ brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
+ brw_inst_set_3src_a16_src1_rep_ctrl(devinfo, inst,
+ src1.vstride == BRW_VERTICAL_STRIDE_0);
+
+ assert(src2.file == BRW_GENERAL_REGISTER_FILE);
+ brw_inst_set_3src_a16_src2_swizzle(devinfo, inst, src2.swizzle);
+ brw_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, get_3src_subreg_nr(src2));
+ brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
+ brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
+ brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
+ brw_inst_set_3src_a16_src2_rep_ctrl(devinfo, inst,
+ src2.vstride == BRW_VERTICAL_STRIDE_0);
+
+ if (devinfo->gen >= 7) {
+ /* Set both the source and destination types based on dest.type,
+ * ignoring the source register types. The MAD and LRP emitters ensure
+ * that all four types are float. The BFE and BFI2 emitters, however,
+ * may send us mixed D and UD types and want us to ignore that and use
+ * the destination type.
+ */
+ brw_inst_set_3src_a16_src_type(devinfo, inst, dest.type);
+ brw_inst_set_3src_a16_dst_type(devinfo, inst, dest.type);
+
+ /* From the Bspec, 3D Media GPGPU, Instruction fields, srcType:
+ *
+ * "Three source instructions can use operands with mixed-mode
+ * precision. When SrcType field is set to :f or :hf it defines
+ * precision for source 0 only, and fields Src1Type and Src2Type
+ * define precision for other source operands:
+ *
+ * 0b = :f. Single precision Float (32-bit).
+ * 1b = :hf. Half precision Float (16-bit)."
+ */
+ if (src1.type == BRW_REGISTER_TYPE_HF)
+ brw_inst_set_3src_a16_src1_type(devinfo, inst, 1);
+
+ if (src2.type == BRW_REGISTER_TYPE_HF)
+ brw_inst_set_3src_a16_src2_type(devinfo, inst, 1);
}
}
struct brw_reg src0, \
struct brw_reg src1, \
struct brw_reg src2) \
-{ \
+{ \
+ if (p->current->access_mode == BRW_ALIGN_16) { \
+ if (src0.vstride == BRW_VERTICAL_STRIDE_0) \
+ src0.swizzle = BRW_SWIZZLE_XXXX; \
+ if (src1.vstride == BRW_VERTICAL_STRIDE_0) \
+ src1.swizzle = BRW_SWIZZLE_XXXX; \
+ if (src2.vstride == BRW_VERTICAL_STRIDE_0) \
+ src2.swizzle = BRW_SWIZZLE_XXXX; \
+ } \
return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
}
assert(src0.type == BRW_REGISTER_TYPE_DF); \
assert(src1.type == BRW_REGISTER_TYPE_DF); \
assert(src2.type == BRW_REGISTER_TYPE_DF); \
+ } \
+ \
+ if (p->current->access_mode == BRW_ALIGN_16) { \
+ if (src0.vstride == BRW_VERTICAL_STRIDE_0) \
+ src0.swizzle = BRW_SWIZZLE_XXXX; \
+ if (src1.vstride == BRW_VERTICAL_STRIDE_0) \
+ src1.swizzle = BRW_SWIZZLE_XXXX; \
+ if (src2.vstride == BRW_VERTICAL_STRIDE_0) \
+ src2.swizzle = BRW_SWIZZLE_XXXX; \
} \
return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
}
ALU2(SHL)
ALU1(DIM)
ALU2(ASR)
+ALU2(ROL)
+ALU2(ROR)
+ALU3(CSEL)
ALU1(FRC)
ALU1(RNDD)
ALU2(MAC)
ALU2(DPH)
ALU2(DP3)
ALU2(DP2)
-ALU3F(MAD)
+ALU3(MAD)
ALU3F(LRP)
ALU1(BFREV)
ALU3(BFE)
const struct gen_device_info *devinfo = p->devinfo;
/* When converting F->DF on IVB/BYT, every odd source channel is ignored.
- * To avoid the problems that causes, we use a <1,2,0> source region to read
- * each element twice.
+ * To avoid the problems that causes, we use an <X,2,0> source region to
+ * read each element twice.
*/
if (devinfo->gen == 7 && !devinfo->is_haswell &&
- brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1 &&
+ brw_get_default_access_mode(p) == BRW_ALIGN_1 &&
dest.type == BRW_REGISTER_TYPE_DF &&
(src0.type == BRW_REGISTER_TYPE_F ||
src0.type == BRW_REGISTER_TYPE_D ||
src0.type == BRW_REGISTER_TYPE_UD) &&
!has_scalar_region(src0)) {
- assert(src0.vstride == BRW_VERTICAL_STRIDE_4 &&
- src0.width == BRW_WIDTH_4 &&
- src0.hstride == BRW_HORIZONTAL_STRIDE_1);
-
- src0.vstride = BRW_VERTICAL_STRIDE_1;
+ assert(src0.vstride == src0.width + src0.hstride);
+ src0.vstride = src0.hstride;
src0.width = BRW_WIDTH_2;
src0.hstride = BRW_HORIZONTAL_STRIDE_0;
}
brw_F32TO16(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src)
{
const struct gen_device_info *devinfo = p->devinfo;
- const bool align16 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_16;
+ const bool align16 = brw_get_default_access_mode(p) == BRW_ALIGN_16;
/* The F32TO16 instruction doesn't support 32-bit destination types in
* Align1 mode, and neither does the Gen8 implementation in terms of a
* converting MOV. Gen7 does zero out the high 16 bits in Align16 mode as
}
if (needs_zero_fill) {
- brw_inst_set_no_dd_clear(devinfo, inst, true);
+ if (devinfo->gen < 12)
+ brw_inst_set_no_dd_clear(devinfo, inst, true);
inst = brw_MOV(p, suboffset(dst, 1), brw_imm_w(0));
- brw_inst_set_no_dd_check(devinfo, inst, true);
+ if (devinfo->gen < 12)
+ brw_inst_set_no_dd_check(devinfo, inst, true);
}
brw_pop_insn_state(p);
brw_F16TO32(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src)
{
const struct gen_device_info *devinfo = p->devinfo;
- bool align16 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_16;
+ bool align16 = brw_get_default_access_mode(p) == BRW_ALIGN_16;
if (align16) {
assert(src.type == BRW_REGISTER_TYPE_UD);
struct brw_reg ip = brw_ip_reg();
brw_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index);
- brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_2);
+ brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_1);
brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
brw_inst_set_pred_control(devinfo, inst, predicate_control);
brw_inst_set_uip(devinfo, insn, 0);
} else {
brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
- brw_set_src0(p, insn, brw_imm_d(0));
+ if (devinfo->gen < 12)
+ brw_set_src0(p, insn, brw_imm_d(0));
brw_inst_set_jip(devinfo, insn, 0);
brw_inst_set_uip(devinfo, insn, 0);
}
insn = next_insn(p, BRW_OPCODE_IF);
brw_set_dest(p, insn, brw_imm_w(0));
- brw_inst_set_exec_size(devinfo, insn,
- brw_inst_exec_size(devinfo, p->current));
+ brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
brw_inst_set_gen6_jump_count(devinfo, insn, 0);
brw_set_src0(p, insn, src0);
brw_set_src1(p, insn, src1);
brw_inst_set_uip(devinfo, insn, 0);
} else {
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src0(p, insn, brw_imm_d(0));
+ if (devinfo->gen < 12)
+ brw_set_src0(p, insn, brw_imm_d(0));
brw_inst_set_jip(devinfo, insn, 0);
brw_inst_set_uip(devinfo, insn, 0);
}
p->if_depth_in_loop[p->loop_stack_depth]);
}
brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
- brw_inst_set_exec_size(devinfo, insn,
- brw_inst_exec_size(devinfo, p->current));
+ brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
return insn;
}
p->if_depth_in_loop[p->loop_stack_depth]);
}
brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
- brw_inst_set_exec_size(devinfo, insn,
- brw_inst_exec_size(devinfo, p->current));
+ brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
return insn;
}
insn = next_insn(p, BRW_OPCODE_HALT);
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- if (devinfo->gen >= 8) {
- brw_set_src0(p, insn, brw_imm_d(0x0));
- } else {
+ if (devinfo->gen < 8) {
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
+ } else if (devinfo->gen < 12) {
+ brw_set_src0(p, insn, brw_imm_d(0x0));
}
brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
- brw_inst_set_exec_size(devinfo, insn,
- brw_inst_exec_size(devinfo, p->current));
+ brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
return insn;
}
if (devinfo->gen >= 8) {
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src0(p, insn, brw_imm_d(0));
+ if (devinfo->gen < 12)
+ brw_set_src0(p, insn, brw_imm_d(0));
brw_inst_set_jip(devinfo, insn, br * (do_insn - insn));
} else if (devinfo->gen == 7) {
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
}
- brw_inst_set_exec_size(devinfo, insn,
- brw_inst_exec_size(devinfo, p->current));
+ brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
} else {
if (p->single_program_flow) {
assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
(devinfo->gen >= 8 && src1.file == BRW_IMMEDIATE_VALUE));
} else {
- assert(src0.type == BRW_REGISTER_TYPE_F);
- assert(src1.type == BRW_REGISTER_TYPE_F);
+ assert(src0.type == BRW_REGISTER_TYPE_F ||
+ (src0.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 9));
+ assert(src1.type == BRW_REGISTER_TYPE_F ||
+ (src1.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 9));
}
/* Source modifiers are ignored for extended math instructions on Gen6. */
const unsigned target_cache =
(devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE :
devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
- BRW_DATAPORT_READ_TARGET_RENDER_CACHE);
+ BRW_SFID_DATAPORT_WRITE);
uint32_t msg_type;
if (devinfo->gen >= 6)
brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
/* set message header global offset field (reg 0, element 2) */
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_MOV(p,
retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
mrf.nr,
struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
BRW_REGISTER_TYPE_UW);
+ brw_inst_set_sfid(devinfo, insn, target_cache);
brw_inst_set_compression(devinfo, insn, false);
if (brw_inst_exec_size(devinfo, insn) >= 16)
else
msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
- brw_set_dp_write_message(p,
- insn,
- brw_scratch_surface_idx(p),
- BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8),
- msg_type,
- target_cache,
- mlen,
- true, /* header_present */
- 0, /* not a render target */
- send_commit_msg, /* response_length */
- 0, /* eot */
- send_commit_msg);
+ brw_set_desc(p, insn,
+ brw_message_desc(devinfo, mlen, send_commit_msg, true) |
+ brw_dp_write_desc(devinfo, brw_scratch_surface_idx(p),
+ BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8),
+ msg_type, 0, /* not a render target */
+ send_commit_msg));
}
}
const unsigned target_cache =
(devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE :
devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
- BRW_DATAPORT_READ_TARGET_RENDER_CACHE);
+ BRW_SFID_DATAPORT_READ);
{
brw_push_insn_state(p);
brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
/* set message header global offset field (reg 0, element 2) */
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_MOV(p, get_element_ud(mrf, 2), brw_imm_ud(offset));
brw_pop_insn_state(p);
{
brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
+ brw_inst_set_sfid(devinfo, insn, target_cache);
assert(brw_inst_pred_control(devinfo, insn) == 0);
brw_inst_set_compression(devinfo, insn, false);
brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
}
- brw_set_dp_read_message(p,
- insn,
- brw_scratch_surface_idx(p),
- BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8),
- BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
- target_cache,
- 1, /* msg_length */
- true, /* header_present */
- rlen);
+ brw_set_desc(p, insn,
+ brw_message_desc(devinfo, 1, rlen, true) |
+ brw_dp_read_desc(devinfo, brw_scratch_surface_idx(p),
+ BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8),
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
+ BRW_DATAPORT_READ_TARGET_RENDER_CACHE));
}
}
const struct gen_device_info *devinfo = p->devinfo;
const unsigned target_cache =
(devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE :
- BRW_DATAPORT_READ_TARGET_DATA_CACHE);
- const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current);
+ BRW_SFID_DATAPORT_READ);
+ const unsigned exec_size = 1 << brw_get_default_exec_size(p);
/* On newer hardware, offset is in units of owords. */
if (devinfo->gen >= 6)
brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
/* set message header global offset field (reg 0, element 2) */
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_MOV(p,
retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
mrf.nr,
brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
+ brw_inst_set_sfid(devinfo, insn, target_cache);
+
/* cast dest to a uword[8] vector */
dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
}
- brw_set_dp_read_message(p, insn, bind_table_index,
- BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size),
- BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
- target_cache,
- 1, /* msg_length */
- true, /* header_present */
- DIV_ROUND_UP(exec_size, 8)); /* response_length */
+ brw_set_desc(p, insn,
+ brw_message_desc(devinfo, 1, DIV_ROUND_UP(exec_size, 8), true) |
+ brw_dp_read_desc(devinfo, bind_table_index,
+ BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size),
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
+ BRW_DATAPORT_READ_TARGET_DATA_CACHE));
brw_pop_insn_state(p);
}
-
-void brw_fb_WRITE(struct brw_codegen *p,
- struct brw_reg payload,
- struct brw_reg implied_header,
- unsigned msg_control,
- unsigned binding_table_index,
- unsigned msg_length,
- unsigned response_length,
- bool eot,
- bool last_render_target,
- bool header_present)
+brw_inst *
+brw_fb_WRITE(struct brw_codegen *p,
+ struct brw_reg payload,
+ struct brw_reg implied_header,
+ unsigned msg_control,
+ unsigned binding_table_index,
+ unsigned msg_length,
+ unsigned response_length,
+ bool eot,
+ bool last_render_target,
+ bool header_present)
{
const struct gen_device_info *devinfo = p->devinfo;
const unsigned target_cache =
(devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
- BRW_DATAPORT_READ_TARGET_RENDER_CACHE);
+ BRW_SFID_DATAPORT_WRITE);
brw_inst *insn;
unsigned msg_type;
struct brw_reg dest, src0;
- if (brw_inst_exec_size(devinfo, p->current) >= BRW_EXECUTE_16)
+ if (brw_get_default_exec_size(p) >= BRW_EXECUTE_16)
dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
else
dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
} else {
insn = next_insn(p, BRW_OPCODE_SEND);
}
+ brw_inst_set_sfid(devinfo, insn, target_cache);
brw_inst_set_compression(devinfo, insn, false);
if (devinfo->gen >= 6) {
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
- brw_set_dp_write_message(p,
- insn,
- binding_table_index,
- msg_control,
- msg_type,
- target_cache,
- msg_length,
- header_present,
- last_render_target,
- response_length,
- eot,
- 0 /* send_commit_msg */);
+ brw_set_desc(p, insn,
+ brw_message_desc(devinfo, msg_length, response_length,
+ header_present) |
+ brw_dp_write_desc(devinfo, binding_table_index, msg_control,
+ msg_type, last_render_target,
+ 0 /* send_commit_msg */));
+ brw_inst_set_eot(devinfo, insn, eot);
+
+ return insn;
}
brw_inst *
const struct gen_device_info *devinfo = p->devinfo;
assert(devinfo->gen >= 9);
const unsigned msg_subtype =
- brw_inst_exec_size(devinfo, p->current) == BRW_EXECUTE_16 ? 0 : 1;
+ brw_get_default_exec_size(p) == BRW_EXECUTE_16 ? 0 : 1;
brw_inst *insn = next_insn(p, BRW_OPCODE_SENDC);
+ brw_inst_set_sfid(devinfo, insn, GEN6_SFID_DATAPORT_RENDER_CACHE);
brw_set_dest(p, insn, dst);
brw_set_src0(p, insn, payload);
- brw_set_dp_read_message(p, insn, binding_table_index,
- per_sample << 5 | msg_subtype,
- GEN9_DATAPORT_RC_RENDER_TARGET_READ,
- GEN6_SFID_DATAPORT_RENDER_CACHE,
- msg_length, true /* header_present */,
- response_length);
- brw_inst_set_rt_slot_group(devinfo, insn,
- brw_inst_qtr_control(devinfo, p->current) / 2);
+ brw_set_desc(
+ p, insn,
+ brw_message_desc(devinfo, msg_length, response_length, true) |
+ brw_dp_read_desc(devinfo, binding_table_index,
+ per_sample << 5 | msg_subtype,
+ GEN9_DATAPORT_RC_RENDER_TARGET_READ,
+ BRW_DATAPORT_READ_TARGET_RENDER_CACHE));
+ brw_inst_set_rt_slot_group(devinfo, insn, brw_get_default_group(p) / 16);
return insn;
}
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
insn = next_insn(p, BRW_OPCODE_SEND);
+ brw_inst_set_sfid(devinfo, insn, BRW_SFID_SAMPLER);
brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NONE); /* XXX */
/* From the 965 PRM (volume 4, part 1, section 14.2.41):
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
- brw_set_sampler_message(p, insn,
- binding_table_index,
- sampler,
- msg_type,
- response_length,
- msg_length,
- header_present,
- simd_mode,
- return_format);
+ brw_set_desc(p, insn,
+ brw_message_desc(devinfo, msg_length, response_length,
+ header_present) |
+ brw_sampler_desc(devinfo, binding_table_index, sampler,
+ msg_type, simd_mode, return_format));
}
/* Adjust the message header's sampler state pointer to
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
BRW_REGISTER_TYPE_UD),
retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
swizzle);
}
-struct brw_inst *
+void
brw_send_indirect_message(struct brw_codegen *p,
unsigned sfid,
struct brw_reg dst,
struct brw_reg payload,
- struct brw_reg desc)
+ struct brw_reg desc,
+ unsigned desc_imm,
+ bool eot)
{
const struct gen_device_info *devinfo = p->devinfo;
struct brw_inst *send;
- int setup;
dst = retype(dst, BRW_REGISTER_TYPE_UW);
assert(desc.type == BRW_REGISTER_TYPE_UD);
- /* We hold on to the setup instruction (the SEND in the direct case, the OR
- * in the indirect case) by its index in the instruction store. The
- * pointer returned by next_insn() may become invalid if emitting the SEND
- * in the indirect case reallocs the store.
- */
-
if (desc.file == BRW_IMMEDIATE_VALUE) {
- setup = p->nr_insn;
send = next_insn(p, BRW_OPCODE_SEND);
- brw_set_src1(p, send, desc);
-
+ brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
+ brw_set_desc(p, send, desc.ud | desc_imm);
} else {
struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
/* Load the indirect descriptor to an address register using OR so the
- * caller can specify additional descriptor bits with the usual
- * brw_set_*_message() helper functions.
+ * caller can specify additional descriptor bits with the desc_imm
+ * immediate.
*/
- setup = p->nr_insn;
- brw_OR(p, addr, desc, brw_imm_ud(0));
+ brw_OR(p, addr, desc, brw_imm_ud(desc_imm));
brw_pop_insn_state(p);
send = next_insn(p, BRW_OPCODE_SEND);
- brw_set_src1(p, send, addr);
- }
+ brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
- if (dst.width < BRW_EXECUTE_8)
- brw_inst_set_exec_size(devinfo, send, dst.width);
+ if (devinfo->gen >= 12)
+ brw_inst_set_send_sel_reg32_desc(devinfo, send, true);
+ else
+ brw_set_src1(p, send, addr);
+ }
brw_set_dest(p, send, dst);
- brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
brw_inst_set_sfid(devinfo, send, sfid);
+ brw_inst_set_eot(devinfo, send, eot);
+}
+
+void
+brw_send_indirect_split_message(struct brw_codegen *p,
+ unsigned sfid,
+ struct brw_reg dst,
+ struct brw_reg payload0,
+ struct brw_reg payload1,
+ struct brw_reg desc,
+ unsigned desc_imm,
+ struct brw_reg ex_desc,
+ unsigned ex_desc_imm,
+ bool eot)
+{
+ const struct gen_device_info *devinfo = p->devinfo;
+ struct brw_inst *send;
+
+ dst = retype(dst, BRW_REGISTER_TYPE_UW);
+
+ assert(desc.type == BRW_REGISTER_TYPE_UD);
+
+ if (desc.file == BRW_IMMEDIATE_VALUE) {
+ desc.ud |= desc_imm;
+ } else {
+ struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
+
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
+ brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* Load the indirect descriptor to an address register using OR so the
+ * caller can specify additional descriptor bits with the desc_imm
+ * immediate.
+ */
+ brw_OR(p, addr, desc, brw_imm_ud(desc_imm));
- return &p->store[setup];
+ brw_pop_insn_state(p);
+ desc = addr;
+ }
+
+ if (ex_desc.file == BRW_IMMEDIATE_VALUE &&
+ (ex_desc.ud & INTEL_MASK(15, 12)) == 0) {
+ ex_desc.ud |= ex_desc_imm;
+ } else {
+ struct brw_reg addr = retype(brw_address_reg(2), BRW_REGISTER_TYPE_UD);
+
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
+ brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* Load the indirect extended descriptor to an address register using OR
+ * so the caller can specify additional descriptor bits with the
+ * desc_imm immediate.
+ *
+ * Even though the instruction dispatcher always pulls the SFID and EOT
+ * fields from the instruction itself, actual external unit which
+ * processes the message gets the SFID and EOT from the extended
+ * descriptor which comes from the address register. If we don't OR
+ * those two bits in, the external unit may get confused and hang.
+ */
+ unsigned imm_part = ex_desc_imm | sfid | eot << 5;
+
+ if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
+ /* ex_desc bits 15:12 don't exist in the instruction encoding, so
+ * we may have fallen back to an indirect extended descriptor.
+ */
+ brw_MOV(p, addr, brw_imm_ud(ex_desc.ud | imm_part));
+ } else {
+ brw_OR(p, addr, ex_desc, brw_imm_ud(imm_part));
+ }
+
+ brw_pop_insn_state(p);
+ ex_desc = addr;
+ }
+
+ send = next_insn(p, devinfo->gen >= 12 ? BRW_OPCODE_SEND : BRW_OPCODE_SENDS);
+ brw_set_dest(p, send, dst);
+ brw_set_src0(p, send, retype(payload0, BRW_REGISTER_TYPE_UD));
+ brw_set_src1(p, send, retype(payload1, BRW_REGISTER_TYPE_UD));
+
+ if (desc.file == BRW_IMMEDIATE_VALUE) {
+ brw_inst_set_send_sel_reg32_desc(devinfo, send, 0);
+ brw_inst_set_send_desc(devinfo, send, desc.ud);
+ } else {
+ assert(desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
+ assert(desc.nr == BRW_ARF_ADDRESS);
+ assert(desc.subnr == 0);
+ brw_inst_set_send_sel_reg32_desc(devinfo, send, 1);
+ }
+
+ if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
+ brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0);
+ brw_inst_set_sends_ex_desc(devinfo, send, ex_desc.ud);
+ } else {
+ assert(ex_desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
+ assert(ex_desc.nr == BRW_ARF_ADDRESS);
+ assert((ex_desc.subnr & 0x3) == 0);
+ brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1);
+ brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, ex_desc.subnr >> 2);
+ }
+
+ brw_inst_set_sfid(devinfo, send, sfid);
+ brw_inst_set_eot(devinfo, send, eot);
}
-static struct brw_inst *
+static void
brw_send_indirect_surface_message(struct brw_codegen *p,
unsigned sfid,
struct brw_reg dst,
struct brw_reg payload,
struct brw_reg surface,
- unsigned message_len,
- unsigned response_len,
- bool header_present)
+ unsigned desc_imm)
{
- const struct gen_device_info *devinfo = p->devinfo;
- struct brw_inst *insn;
-
if (surface.file != BRW_IMMEDIATE_VALUE) {
struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
/* Mask out invalid bits from the surface index to avoid hangs e.g. when
* some surface array is accessed out of bounds.
*/
- insn = brw_AND(p, addr,
- suboffset(vec1(retype(surface, BRW_REGISTER_TYPE_UD)),
- BRW_GET_SWZ(surface.swizzle, 0)),
- brw_imm_ud(0xff));
+ brw_AND(p, addr,
+ suboffset(vec1(retype(surface, BRW_REGISTER_TYPE_UD)),
+ BRW_GET_SWZ(surface.swizzle, 0)),
+ brw_imm_ud(0xff));
brw_pop_insn_state(p);
surface = addr;
}
- insn = brw_send_indirect_message(p, sfid, dst, payload, surface);
- brw_inst_set_mlen(devinfo, insn, message_len);
- brw_inst_set_rlen(devinfo, insn, response_len);
- brw_inst_set_header_present(devinfo, insn, header_present);
-
- return insn;
+ brw_send_indirect_message(p, sfid, dst, payload, surface, desc_imm, false);
}
static bool
case BRW_OPCODE_HALT:
if (depth == 0)
return offset;
+ default:
+ break;
}
}
assert(brw_inst_uip(devinfo, insn) != 0);
assert(brw_inst_jip(devinfo, insn) != 0);
break;
+
+ default:
+ break;
}
}
}
const unsigned target_cache =
(devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE :
devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
- BRW_DATAPORT_READ_TARGET_RENDER_CACHE);
+ BRW_SFID_DATAPORT_WRITE);
brw_inst *insn;
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
insn = next_insn(p, BRW_OPCODE_SEND);
+ brw_inst_set_sfid(devinfo, insn, target_cache);
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
- brw_set_src1(p, insn, brw_imm_d(0));
- brw_set_dp_write_message(p, insn,
- binding_table_index,
- 0, /* msg_control: ignored */
- GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
- target_cache,
- 1, /* msg_length */
- true, /* header_present */
- 0, /* last_render_target: ignored */
- send_commit_msg, /* response_length */
- 0, /* end_of_thread */
- send_commit_msg); /* send_commit_msg */
+ brw_set_desc(p, insn,
+ brw_message_desc(devinfo, 1, send_commit_msg, true) |
+ brw_dp_write_desc(devinfo, binding_table_index,
+ 0, /* msg_control: ignored */
+ GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
+ 0, /* last_render_target: ignored */
+ send_commit_msg)); /* send_commit_msg */
}
static unsigned
brw_surface_payload_size(struct brw_codegen *p,
unsigned num_channels,
- bool has_simd4x2,
- bool has_simd16)
-{
- if (has_simd4x2 &&
- brw_inst_access_mode(p->devinfo, p->current) == BRW_ALIGN_16)
- return 1;
- else if (has_simd16 &&
- brw_inst_exec_size(p->devinfo, p->current) == BRW_EXECUTE_16)
- return 2 * num_channels;
- else
- return num_channels;
-}
-
-static void
-brw_set_dp_untyped_atomic_message(struct brw_codegen *p,
- brw_inst *insn,
- unsigned atomic_op,
- bool response_expected)
+ unsigned exec_size /**< 0 for SIMD4x2 */)
{
- const struct gen_device_info *devinfo = p->devinfo;
- unsigned msg_control =
- atomic_op | /* Atomic Operation Type: BRW_AOP_* */
- (response_expected ? 1 << 5 : 0); /* Return data expected */
-
- if (devinfo->gen >= 8 || devinfo->is_haswell) {
- if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
- if (brw_inst_exec_size(devinfo, p->current) != BRW_EXECUTE_16)
- msg_control |= 1 << 4; /* SIMD8 mode */
-
- brw_inst_set_dp_msg_type(devinfo, insn,
- HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP);
- } else {
- brw_inst_set_dp_msg_type(devinfo, insn,
- HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2);
- }
- } else {
- brw_inst_set_dp_msg_type(devinfo, insn,
- GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP);
-
- if (brw_inst_exec_size(devinfo, p->current) != BRW_EXECUTE_16)
- msg_control |= 1 << 4; /* SIMD8 mode */
- }
-
- brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
+ if (exec_size == 0)
+ return 1; /* SIMD4x2 */
+ else if (exec_size <= 8)
+ return num_channels;
+ else
+ return 2 * num_channels;
}
void
struct brw_reg surface,
unsigned atomic_op,
unsigned msg_length,
- bool response_expected)
+ bool response_expected,
+ bool header_present)
{
const struct gen_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
HSW_SFID_DATAPORT_DATA_CACHE_1 :
GEN7_SFID_DATAPORT_DATA_CACHE);
- const bool align1 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1;
+ const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
+ /* SIMD4x2 untyped atomic instructions only exist on HSW+ */
+ const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell;
+ const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) :
+ has_simd4x2 ? 0 : 8;
+ const unsigned response_length =
+ brw_surface_payload_size(p, response_expected, exec_size);
+ const unsigned desc =
+ brw_message_desc(devinfo, msg_length, response_length, header_present) |
+ brw_dp_untyped_atomic_desc(devinfo, exec_size, atomic_op,
+ response_expected);
/* Mask out unused components -- This is especially important in Align16
* mode on generations that don't have native support for SIMD4x2 atomics,
* because unused but enabled components will cause the dataport to perform
* uninitialized Y, Z and W coordinates of the payload.
*/
const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X;
- struct brw_inst *insn = brw_send_indirect_surface_message(
- p, sfid, brw_writemask(dst, mask), payload, surface, msg_length,
- brw_surface_payload_size(p, response_expected,
- devinfo->gen >= 8 || devinfo->is_haswell, true),
- align1);
- brw_set_dp_untyped_atomic_message(
- p, insn, atomic_op, response_expected);
-}
-
-static void
-brw_set_dp_untyped_surface_read_message(struct brw_codegen *p,
- struct brw_inst *insn,
- unsigned num_channels)
-{
- const struct gen_device_info *devinfo = p->devinfo;
- /* Set mask of 32-bit channels to drop. */
- unsigned msg_control = 0xf & (0xf << num_channels);
-
- if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
- if (brw_inst_exec_size(devinfo, p->current) == BRW_EXECUTE_16)
- msg_control |= 1 << 4; /* SIMD16 mode */
- else
- msg_control |= 2 << 4; /* SIMD8 mode */
- }
-
- brw_inst_set_dp_msg_type(devinfo, insn,
- (devinfo->gen >= 8 || devinfo->is_haswell ?
- HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ :
- GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ));
- brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
+ brw_send_indirect_surface_message(p, sfid, brw_writemask(dst, mask),
+ payload, surface, desc);
}
void
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
HSW_SFID_DATAPORT_DATA_CACHE_1 :
GEN7_SFID_DATAPORT_DATA_CACHE);
- struct brw_inst *insn = brw_send_indirect_surface_message(
- p, sfid, dst, payload, surface, msg_length,
- brw_surface_payload_size(p, num_channels, true, true),
- false);
+ const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
+ const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : 0;
+ const unsigned response_length =
+ brw_surface_payload_size(p, num_channels, exec_size);
+ const unsigned desc =
+ brw_message_desc(devinfo, msg_length, response_length, false) |
+ brw_dp_untyped_surface_rw_desc(devinfo, exec_size, num_channels, false);
- brw_set_dp_untyped_surface_read_message(
- p, insn, num_channels);
-}
-
-static void
-brw_set_dp_untyped_surface_write_message(struct brw_codegen *p,
- struct brw_inst *insn,
- unsigned num_channels)
-{
- const struct gen_device_info *devinfo = p->devinfo;
- /* Set mask of 32-bit channels to drop. */
- unsigned msg_control = 0xf & (0xf << num_channels);
-
- if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
- if (brw_inst_exec_size(devinfo, p->current) == BRW_EXECUTE_16)
- msg_control |= 1 << 4; /* SIMD16 mode */
- else
- msg_control |= 2 << 4; /* SIMD8 mode */
- } else {
- if (devinfo->gen >= 8 || devinfo->is_haswell)
- msg_control |= 0 << 4; /* SIMD4x2 mode */
- else
- msg_control |= 2 << 4; /* SIMD8 mode */
- }
-
- brw_inst_set_dp_msg_type(devinfo, insn,
- devinfo->gen >= 8 || devinfo->is_haswell ?
- HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE :
- GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE);
- brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
+ brw_send_indirect_surface_message(p, sfid, dst, payload, surface, desc);
}
void
struct brw_reg payload,
struct brw_reg surface,
unsigned msg_length,
- unsigned num_channels)
+ unsigned num_channels,
+ bool header_present)
{
const struct gen_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
HSW_SFID_DATAPORT_DATA_CACHE_1 :
GEN7_SFID_DATAPORT_DATA_CACHE);
- const bool align1 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1;
- /* Mask out unused components -- See comment in brw_untyped_atomic(). */
- const unsigned mask = devinfo->gen == 7 && !devinfo->is_haswell && !align1 ?
- WRITEMASK_X : WRITEMASK_XYZW;
- struct brw_inst *insn = brw_send_indirect_surface_message(
- p, sfid, brw_writemask(brw_null_reg(), mask),
- payload, surface, msg_length, 0, align1);
-
- brw_set_dp_untyped_surface_write_message(
- p, insn, num_channels);
-}
-
-static void
-brw_set_dp_typed_atomic_message(struct brw_codegen *p,
- struct brw_inst *insn,
- unsigned atomic_op,
- bool response_expected)
-{
- const struct gen_device_info *devinfo = p->devinfo;
- unsigned msg_control =
- atomic_op | /* Atomic Operation Type: BRW_AOP_* */
- (response_expected ? 1 << 5 : 0); /* Return data expected */
-
- if (devinfo->gen >= 8 || devinfo->is_haswell) {
- if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
- if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1)
- msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */
-
- brw_inst_set_dp_msg_type(devinfo, insn,
- HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP);
- } else {
- brw_inst_set_dp_msg_type(devinfo, insn,
- HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2);
- }
-
- } else {
- brw_inst_set_dp_msg_type(devinfo, insn,
- GEN7_DATAPORT_RC_TYPED_ATOMIC_OP);
-
- if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1)
- msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */
- }
-
- brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
-}
-
-void
-brw_typed_atomic(struct brw_codegen *p,
- struct brw_reg dst,
- struct brw_reg payload,
- struct brw_reg surface,
- unsigned atomic_op,
- unsigned msg_length,
- bool response_expected) {
- const struct gen_device_info *devinfo = p->devinfo;
- const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
- HSW_SFID_DATAPORT_DATA_CACHE_1 :
- GEN6_SFID_DATAPORT_RENDER_CACHE);
- const bool align1 = (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1);
- /* Mask out unused components -- See comment in brw_untyped_atomic(). */
- const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X;
- struct brw_inst *insn = brw_send_indirect_surface_message(
- p, sfid, brw_writemask(dst, mask), payload, surface, msg_length,
- brw_surface_payload_size(p, response_expected,
- devinfo->gen >= 8 || devinfo->is_haswell, false),
- true);
-
- brw_set_dp_typed_atomic_message(
- p, insn, atomic_op, response_expected);
-}
-
-static void
-brw_set_dp_typed_surface_read_message(struct brw_codegen *p,
- struct brw_inst *insn,
- unsigned num_channels)
-{
- const struct gen_device_info *devinfo = p->devinfo;
- /* Set mask of unused channels. */
- unsigned msg_control = 0xf & (0xf << num_channels);
-
- if (devinfo->gen >= 8 || devinfo->is_haswell) {
- if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
- if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1)
- msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */
- else
- msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */
- }
-
- brw_inst_set_dp_msg_type(devinfo, insn,
- HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ);
- } else {
- if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
- if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1)
- msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */
- }
-
- brw_inst_set_dp_msg_type(devinfo, insn,
- GEN7_DATAPORT_RC_TYPED_SURFACE_READ);
- }
-
- brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
-}
-
-void
-brw_typed_surface_read(struct brw_codegen *p,
- struct brw_reg dst,
- struct brw_reg payload,
- struct brw_reg surface,
- unsigned msg_length,
- unsigned num_channels)
-{
- const struct gen_device_info *devinfo = p->devinfo;
- const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
- HSW_SFID_DATAPORT_DATA_CACHE_1 :
- GEN6_SFID_DATAPORT_RENDER_CACHE);
- struct brw_inst *insn = brw_send_indirect_surface_message(
- p, sfid, dst, payload, surface, msg_length,
- brw_surface_payload_size(p, num_channels,
- devinfo->gen >= 8 || devinfo->is_haswell, false),
- true);
-
- brw_set_dp_typed_surface_read_message(
- p, insn, num_channels);
-}
-
-static void
-brw_set_dp_typed_surface_write_message(struct brw_codegen *p,
- struct brw_inst *insn,
- unsigned num_channels)
-{
- const struct gen_device_info *devinfo = p->devinfo;
- /* Set mask of unused channels. */
- unsigned msg_control = 0xf & (0xf << num_channels);
-
- if (devinfo->gen >= 8 || devinfo->is_haswell) {
- if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
- if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1)
- msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */
- else
- msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */
- }
-
- brw_inst_set_dp_msg_type(devinfo, insn,
- HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE);
-
- } else {
- if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
- if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1)
- msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */
- }
-
- brw_inst_set_dp_msg_type(devinfo, insn,
- GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE);
- }
-
- brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
-}
-
-void
-brw_typed_surface_write(struct brw_codegen *p,
- struct brw_reg payload,
- struct brw_reg surface,
- unsigned msg_length,
- unsigned num_channels)
-{
- const struct gen_device_info *devinfo = p->devinfo;
- const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
- HSW_SFID_DATAPORT_DATA_CACHE_1 :
- GEN6_SFID_DATAPORT_RENDER_CACHE);
- const bool align1 = (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1);
+ const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
+ /* SIMD4x2 untyped surface write instructions only exist on HSW+ */
+ const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell;
+ const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) :
+ has_simd4x2 ? 0 : 8;
+ const unsigned desc =
+ brw_message_desc(devinfo, msg_length, 0, header_present) |
+ brw_dp_untyped_surface_rw_desc(devinfo, exec_size, num_channels, true);
/* Mask out unused components -- See comment in brw_untyped_atomic(). */
- const unsigned mask = (devinfo->gen == 7 && !devinfo->is_haswell && !align1 ?
- WRITEMASK_X : WRITEMASK_XYZW);
- struct brw_inst *insn = brw_send_indirect_surface_message(
- p, sfid, brw_writemask(brw_null_reg(), mask),
- payload, surface, msg_length, 0, true);
+ const unsigned mask = !has_simd4x2 && !align1 ? WRITEMASK_X : WRITEMASK_XYZW;
- brw_set_dp_typed_surface_write_message(
- p, insn, num_channels);
+ brw_send_indirect_surface_message(p, sfid, brw_writemask(brw_null_reg(), mask),
+ payload, surface, desc);
}
static void
brw_set_memory_fence_message(struct brw_codegen *p,
struct brw_inst *insn,
enum brw_message_target sfid,
- bool commit_enable)
+ bool commit_enable,
+ unsigned bti)
{
const struct gen_device_info *devinfo = p->devinfo;
- brw_set_message_descriptor(p, insn, sfid,
- 1 /* message length */,
- (commit_enable ? 1 : 0) /* response length */,
- true /* header present */,
- false);
+ brw_set_desc(p, insn, brw_message_desc(
+ devinfo, 1, (commit_enable ? 1 : 0), true));
+
+ brw_inst_set_sfid(devinfo, insn, sfid);
switch (sfid) {
case GEN6_SFID_DATAPORT_RENDER_CACHE:
if (commit_enable)
brw_inst_set_dp_msg_control(devinfo, insn, 1 << 5);
+
+ assert(devinfo->gen >= 11 || bti == 0);
+ brw_inst_set_binding_table_index(devinfo, insn, bti);
}
void
brw_memory_fence(struct brw_codegen *p,
- struct brw_reg dst)
+ struct brw_reg dst,
+ struct brw_reg src,
+ enum opcode send_op,
+ bool stall,
+ unsigned bti)
{
const struct gen_device_info *devinfo = p->devinfo;
- const bool commit_enable = devinfo->gen == 7 && !devinfo->is_haswell;
+ const bool commit_enable = stall ||
+ devinfo->gen >= 10 || /* HSD ES # 1404612949 */
+ (devinfo->gen == 7 && !devinfo->is_haswell);
struct brw_inst *insn;
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_exec_size(p, BRW_EXECUTE_1);
- dst = vec1(dst);
+ dst = retype(vec1(dst), BRW_REGISTER_TYPE_UW);
+ src = retype(vec1(src), BRW_REGISTER_TYPE_UD);
/* Set dst as destination for dependency tracking, the MEMORY_FENCE
* message doesn't write anything back.
*/
- insn = next_insn(p, BRW_OPCODE_SEND);
- dst = retype(dst, BRW_REGISTER_TYPE_UW);
+ insn = next_insn(p, send_op);
brw_set_dest(p, insn, dst);
- brw_set_src0(p, insn, dst);
+ brw_set_src0(p, insn, src);
brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
- commit_enable);
+ commit_enable, bti);
if (devinfo->gen == 7 && !devinfo->is_haswell) {
/* IVB does typed surface access through the render cache, so we need to
* flush it too. Use a different register so both flushes can be
* pipelined by the hardware.
*/
- insn = next_insn(p, BRW_OPCODE_SEND);
+ insn = next_insn(p, send_op);
brw_set_dest(p, insn, offset(dst, 1));
- brw_set_src0(p, insn, offset(dst, 1));
+ brw_set_src0(p, insn, src);
brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,
- commit_enable);
+ commit_enable, bti);
/* Now write the response of the second message into the response of the
* first to trigger a pipeline stall -- This way future render and data
brw_MOV(p, dst, offset(dst, 1));
}
+ if (stall)
+ brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW), dst);
+
brw_pop_insn_state(p);
}
unsigned response_length)
{
const struct gen_device_info *devinfo = p->devinfo;
- struct brw_inst *insn;
- const uint16_t exec_size = brw_inst_exec_size(devinfo, p->current);
+ const uint16_t exec_size = brw_get_default_exec_size(p);
+ const unsigned slot_group = brw_get_default_group(p) / 16;
+ const unsigned simd_mode = (exec_size == BRW_EXECUTE_16);
+ const unsigned desc =
+ brw_message_desc(devinfo, msg_length, response_length, false) |
+ brw_pixel_interp_desc(devinfo, mode, noperspective, simd_mode,
+ slot_group);
/* brw_send_indirect_message will automatically use a direct send message
* if data is actually immediate.
*/
- insn = brw_send_indirect_message(p,
- GEN7_SFID_PIXEL_INTERPOLATOR,
- dest,
- mrf,
- vec1(data));
- brw_inst_set_mlen(devinfo, insn, msg_length);
- brw_inst_set_rlen(devinfo, insn, response_length);
-
- brw_inst_set_pi_simd_mode(devinfo, insn, exec_size == BRW_EXECUTE_16);
- brw_inst_set_pi_slot_group(devinfo, insn, 0); /* zero unless 32/64px dispatch */
- brw_inst_set_pi_nopersp(devinfo, insn, noperspective);
- brw_inst_set_pi_message_type(devinfo, insn, mode);
+ brw_send_indirect_message(p,
+ GEN7_SFID_PIXEL_INTERPOLATOR,
+ dest,
+ mrf,
+ vec1(data),
+ desc,
+ false);
}
void
struct brw_reg mask)
{
const struct gen_device_info *devinfo = p->devinfo;
- const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current);
- const unsigned qtr_control = brw_inst_qtr_control(devinfo, p->current);
+ const unsigned exec_size = 1 << brw_get_default_exec_size(p);
+ const unsigned qtr_control = brw_get_default_group(p) / 8;
brw_inst *inst;
assert(devinfo->gen >= 7);
brw_push_insn_state(p);
- if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
+ /* The flag register is only used on Gen7 in align1 mode, so avoid setting
+ * unnecessary bits in the instruction words, get the information we need
+ * and reset the default flag register. This allows more instructions to be
+ * compacted.
+ */
+ const unsigned flag_subreg = p->current->flag_subreg;
+ brw_set_default_flag_reg(p, 0, 0);
+
+ if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
if (devinfo->gen >= 8) {
struct brw_reg exec_mask =
retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD);
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
if (mask.file != BRW_IMMEDIATE_VALUE || mask.ud != 0xffffffff) {
/* Unfortunately, ce0 does not take into account the thread
* dispatch mask, which may be a problem in cases where it's not
*/
inst = brw_FBL(p, vec1(dst), exec_mask);
} else {
- const struct brw_reg flag = brw_flag_reg(1, 0);
+ const struct brw_reg flag = brw_flag_subreg(flag_subreg);
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_MOV(p, retype(flag, BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
/* Run enough instructions returning zero with execution masking and
brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE);
brw_inst_set_group(devinfo, inst, lower_size * i + 8 * qtr_control);
brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_Z);
- brw_inst_set_flag_reg_nr(devinfo, inst, 1);
brw_inst_set_exec_size(devinfo, inst, cvt(lower_size) - 1);
+ brw_inst_set_flag_reg_nr(devinfo, inst, flag_subreg / 2);
+ brw_inst_set_flag_subreg_nr(devinfo, inst, flag_subreg % 2);
}
/* Find the first bit set in the exec_size-wide portion of the flag
* instructions.
*/
const enum brw_reg_type type = brw_int_type(exec_size / 8, false);
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_FBL(p, vec1(dst), byte_offset(retype(flag, type), qtr_control));
}
} else {
struct brw_reg idx)
{
const struct gen_device_info *devinfo = p->devinfo;
- const bool align1 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1;
+ const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
brw_inst *inst;
brw_push_insn_state(p);
assert(src.file == BRW_GENERAL_REGISTER_FILE &&
src.address_mode == BRW_ADDRESS_DIRECT);
+ assert(!src.abs && !src.negate);
+ assert(src.type == dst.type);
if ((src.vstride == 0 && (src.hstride == 0 || !align1)) ||
idx.file == BRW_IMMEDIATE_VALUE) {
(align1 ? stride(suboffset(src, i), 0, 1, 0) :
stride(suboffset(src, 4 * i), 0, 4, 1)));
} else {
+ /* From the Haswell PRM section "Register Region Restrictions":
+ *
+ * "The lower bits of the AddressImmediate must not overflow to
+ * change the register address. The lower 5 bits of Address
+ * Immediate when added to lower 5 bits of address register gives
+ * the sub-register offset. The upper bits of Address Immediate
+ * when added to upper bits of address register gives the register
+ * address. Any overflow from sub-register offset is dropped."
+ *
+ * Fortunately, for broadcast, we never have a sub-register offset so
+ * this isn't an issue.
+ */
+ assert(src.subnr == 0);
+
if (align1) {
const struct brw_reg addr =
retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
- const unsigned offset = src.nr * REG_SIZE + src.subnr;
+ unsigned offset = src.nr * REG_SIZE + src.subnr;
/* Limit in bytes of the signed indirect addressing immediate. */
const unsigned limit = 512;
* addressing immediate, account for the difference if the source
* register is above this limit.
*/
- if (offset >= limit)
+ if (offset >= limit) {
brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit));
+ offset = offset % limit;
+ }
brw_pop_insn_state(p);
/* Use indirect addressing to fetch the specified component. */
- brw_MOV(p, dst,
- retype(brw_vec1_indirect(addr.subnr, offset % limit),
- src.type));
+ if (type_sz(src.type) > 4 &&
+ (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
+ /* From the Cherryview PRM Vol 7. "Register Region Restrictions":
+ *
+ * "When source or destination datatype is 64b or operation is
+ * integer DWord multiply, indirect addressing must not be
+ * used."
+ *
+ * To work around both of this issue, we do two integer MOVs
+ * insead of one 64-bit MOV. Because no double value should ever
+ * cross a register boundary, it's safe to use the immediate
+ * offset in the indirect here to handle adding 4 bytes to the
+ * offset and avoid the extra ADD to the register file.
+ */
+ brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
+ retype(brw_vec1_indirect(addr.subnr, offset),
+ BRW_REGISTER_TYPE_D));
+ brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
+ retype(brw_vec1_indirect(addr.subnr, offset + 4),
+ BRW_REGISTER_TYPE_D));
+ } else {
+ brw_MOV(p, dst,
+ retype(brw_vec1_indirect(addr.subnr, offset), src.type));
+ }
} else {
/* In SIMD4x2 mode the index can be either zero or one, replicate it
* to all bits of a flag register,
struct brw_reg payload,
uint32_t surf_index)
{
- const unsigned sfid = (p->devinfo->gen >= 8 || p->devinfo->is_haswell ?
+ const struct gen_device_info *devinfo = p->devinfo;
+ const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
HSW_SFID_DATAPORT_DATA_CACHE_1 :
GEN7_SFID_DATAPORT_DATA_CACHE);
- assert(p->devinfo->gen >= 7);
+ assert(devinfo->gen >= 7);
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
BRW_ARF_NULL, 0));
brw_set_src0(p, send, brw_vec1_reg(payload.file,
payload.nr, 0));
- brw_set_src1(p, send, brw_imm_ud(0));
- brw_set_message_descriptor(p, send, sfid, 2, 0, false, false);
- brw_inst_set_binding_table_index(p->devinfo, send, surf_index);
- brw_set_dp_untyped_atomic_message(p, send, BRW_AOP_ADD, false);
+ brw_set_desc(p, send, (brw_message_desc(devinfo, 2, 0, false) |
+ brw_dp_untyped_atomic_desc(devinfo, 1, BRW_AOP_ADD,
+ false)));
+
+ brw_inst_set_sfid(devinfo, send, sfid);
+ brw_inst_set_binding_table_index(devinfo, send, surf_index);
brw_pop_insn_state(p);
}
brw_set_dest(p, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW));
brw_set_src0(p, inst, src);
brw_set_src1(p, inst, brw_null_reg());
+ brw_set_desc(p, inst, brw_message_desc(devinfo, 1, 0, false));
- brw_set_message_descriptor(p, inst, BRW_SFID_MESSAGE_GATEWAY,
- 1 /* msg_length */,
- 0 /* response_length */,
- false /* header_present */,
- false /* end_of_thread */);
-
+ brw_inst_set_sfid(devinfo, inst, BRW_SFID_MESSAGE_GATEWAY);
brw_inst_set_gateway_notify(devinfo, inst, 1);
brw_inst_set_gateway_subfuncid(devinfo, inst,
BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG);
brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
}
+
+void
+brw_float_controls_mode(struct brw_codegen *p,
+ unsigned mode, unsigned mask)
+{
+ brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
+ brw_imm_ud(~mask));
+ brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
+
+ /* From the Skylake PRM, Volume 7, page 760:
+ * "Implementation Restriction on Register Access: When the control
+ * register is used as an explicit source and/or destination, hardware
+ * does not ensure execution pipeline coherency. Software must set the
+ * thread control field to ‘switch’ for an instruction that uses
+ * control register as an explicit operand."
+ */
+ if (p->devinfo->gen < 12)
+ brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
+
+ if (mode) {
+ brw_inst *inst_or = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
+ brw_imm_ud(mode));
+ brw_inst_set_exec_size(p->devinfo, inst_or, BRW_EXECUTE_1);
+ if (p->devinfo->gen < 12)
+ brw_inst_set_thread_control(p->devinfo, inst_or, BRW_THREAD_SWITCH);
+ }
+}