else if (dest.file == BRW_GENERAL_REGISTER_FILE)
assert(dest.nr < 128);
- gen7_convert_mrf_to_grf(p, &dest);
+ /* The hardware has a restriction where if the destination is Byte,
+ * the instruction needs to have a stride of 2 (except for packed byte
+ * MOV). This seems to be required even if the destination is the NULL
+ * register.
+ */
+ if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+ dest.nr == BRW_ARF_NULL &&
+ type_sz(dest.type) == 1) {
+ dest.hstride = BRW_HORIZONTAL_STRIDE_2;
+ }
- brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type);
- brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
+ gen7_convert_mrf_to_grf(p, &dest);
- if (dest.address_mode == BRW_ADDRESS_DIRECT) {
+ if (devinfo->gen >= 12 &&
+ (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) {
+ assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
+ dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
+ assert(dest.address_mode == BRW_ADDRESS_DIRECT);
+ assert(dest.subnr == 0);
+ assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
+ (dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+ dest.vstride == dest.width + 1));
+ assert(!dest.negate && !dest.abs);
+ brw_inst_set_dst_reg_file(devinfo, inst, dest.file);
brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
- if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
- brw_inst_set_dst_da1_subreg_nr(devinfo, inst, dest.subnr);
- if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
- dest.hstride = BRW_HORIZONTAL_STRIDE_1;
- brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
- } else {
- brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
- brw_inst_set_da16_writemask(devinfo, inst, dest.writemask);
- if (dest.file == BRW_GENERAL_REGISTER_FILE ||
- dest.file == BRW_MESSAGE_REGISTER_FILE) {
- assert(dest.writemask != 0);
- }
- /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
- * Although Dst.HorzStride is a don't care for Align16, HW needs
- * this to be programmed as "01".
- */
- brw_inst_set_dst_hstride(devinfo, inst, 1);
- }
+ } else if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) {
+ assert(devinfo->gen < 12);
+ assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
+ dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
+ assert(dest.address_mode == BRW_ADDRESS_DIRECT);
+ assert(dest.subnr % 16 == 0);
+ assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+ dest.vstride == dest.width + 1);
+ assert(!dest.negate && !dest.abs);
+ brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
+ brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
+ brw_inst_set_send_dst_reg_file(devinfo, inst, dest.file);
} else {
- brw_inst_set_dst_ia_subreg_nr(devinfo, inst, dest.subnr);
+ brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type);
+ brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
- /* These are different sizes in align1 vs align16:
- */
- if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
- brw_inst_set_dst_ia1_addr_imm(devinfo, inst,
- dest.indirect_offset);
- if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
- dest.hstride = BRW_HORIZONTAL_STRIDE_1;
- brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
+ if (dest.address_mode == BRW_ADDRESS_DIRECT) {
+ brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
+
+ if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+ brw_inst_set_dst_da1_subreg_nr(devinfo, inst, dest.subnr);
+ if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+ dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+ brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
+ } else {
+ brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
+ brw_inst_set_da16_writemask(devinfo, inst, dest.writemask);
+ if (dest.file == BRW_GENERAL_REGISTER_FILE ||
+ dest.file == BRW_MESSAGE_REGISTER_FILE) {
+ assert(dest.writemask != 0);
+ }
+ /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
+ * Although Dst.HorzStride is a don't care for Align16, HW needs
+ * this to be programmed as "01".
+ */
+ brw_inst_set_dst_hstride(devinfo, inst, 1);
+ }
} else {
- brw_inst_set_dst_ia16_addr_imm(devinfo, inst,
- dest.indirect_offset);
- /* even ignored in da16, still need to set as '01' */
- brw_inst_set_dst_hstride(devinfo, inst, 1);
+ brw_inst_set_dst_ia_subreg_nr(devinfo, inst, dest.subnr);
+
+ /* These are different sizes in align1 vs align16:
+ */
+ if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+ brw_inst_set_dst_ia1_addr_imm(devinfo, inst,
+ dest.indirect_offset);
+ if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+ dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+ brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
+ } else {
+ brw_inst_set_dst_ia16_addr_imm(devinfo, inst,
+ dest.indirect_offset);
+ /* even ignored in da16, still need to set as '01' */
+ brw_inst_set_dst_hstride(devinfo, inst, 1);
+ }
}
}
gen7_convert_mrf_to_grf(p, ®);
- if (devinfo->gen >= 6 && (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
- brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) {
+ if (devinfo->gen >= 6 &&
+ (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC)) {
/* Any source modifiers or regions will be ignored, since this just
* identifies the MRF/GRF to start reading the message contents from.
* Check for some likely failures.
assert(reg.address_mode == BRW_ADDRESS_DIRECT);
}
- brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type);
- brw_inst_set_src0_abs(devinfo, inst, reg.abs);
- brw_inst_set_src0_negate(devinfo, inst, reg.negate);
- brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
-
- if (reg.file == BRW_IMMEDIATE_VALUE) {
- if (reg.type == BRW_REGISTER_TYPE_DF ||
- brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DIM)
- brw_inst_set_imm_df(devinfo, inst, reg.df);
- else if (reg.type == BRW_REGISTER_TYPE_UQ ||
- reg.type == BRW_REGISTER_TYPE_Q)
- brw_inst_set_imm_uq(devinfo, inst, reg.u64);
- else
- brw_inst_set_imm_ud(devinfo, inst, reg.ud);
-
- if (type_sz(reg.type) < 8) {
- brw_inst_set_src1_reg_file(devinfo, inst,
- BRW_ARCHITECTURE_REGISTER_FILE);
- brw_inst_set_src1_reg_hw_type(devinfo, inst,
- brw_inst_src0_reg_hw_type(devinfo, inst));
- }
+ if (devinfo->gen >= 12 &&
+ (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) {
+ assert(reg.file != BRW_IMMEDIATE_VALUE);
+ assert(reg.address_mode == BRW_ADDRESS_DIRECT);
+ assert(reg.subnr == 0);
+ assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
+ (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+ reg.vstride == reg.width + 1));
+ assert(!reg.negate && !reg.abs);
+ brw_inst_set_send_src0_reg_file(devinfo, inst, reg.file);
+ brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
+
+ } else if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) {
+ assert(reg.file == BRW_GENERAL_REGISTER_FILE);
+ assert(reg.address_mode == BRW_ADDRESS_DIRECT);
+ assert(reg.subnr % 16 == 0);
+ assert(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+ reg.vstride == reg.width + 1);
+ assert(!reg.negate && !reg.abs);
+ brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
+ brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
} else {
- if (reg.address_mode == BRW_ADDRESS_DIRECT) {
- brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
- if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
- brw_inst_set_src0_da1_subreg_nr(devinfo, inst, reg.subnr);
- } else {
- brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
- }
+ brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type);
+ brw_inst_set_src0_abs(devinfo, inst, reg.abs);
+ brw_inst_set_src0_negate(devinfo, inst, reg.negate);
+ brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
+
+ if (reg.file == BRW_IMMEDIATE_VALUE) {
+ if (reg.type == BRW_REGISTER_TYPE_DF ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DIM)
+ brw_inst_set_imm_df(devinfo, inst, reg.df);
+ else if (reg.type == BRW_REGISTER_TYPE_UQ ||
+ reg.type == BRW_REGISTER_TYPE_Q)
+ brw_inst_set_imm_uq(devinfo, inst, reg.u64);
+ else
+ brw_inst_set_imm_ud(devinfo, inst, reg.ud);
+
+ if (devinfo->gen < 12 && type_sz(reg.type) < 8) {
+ brw_inst_set_src1_reg_file(devinfo, inst,
+ BRW_ARCHITECTURE_REGISTER_FILE);
+ brw_inst_set_src1_reg_hw_type(devinfo, inst,
+ brw_inst_src0_reg_hw_type(devinfo, inst));
+ }
} else {
- brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr);
+ if (reg.address_mode == BRW_ADDRESS_DIRECT) {
+ brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
+ if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+ brw_inst_set_src0_da1_subreg_nr(devinfo, inst, reg.subnr);
+ } else {
+ brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
+ }
+ } else {
+ brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr);
- if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
- brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset);
- } else {
- brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset);
- }
- }
+ if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+ brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset);
+ } else {
+ brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset);
+ }
+ }
- if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
- if (reg.width == BRW_WIDTH_1 &&
- brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
- brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
- brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1);
- brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
- } else {
- brw_inst_set_src0_hstride(devinfo, inst, reg.hstride);
- brw_inst_set_src0_width(devinfo, inst, reg.width);
- brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
- }
- } else {
- brw_inst_set_src0_da16_swiz_x(devinfo, inst,
- BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
- brw_inst_set_src0_da16_swiz_y(devinfo, inst,
- BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
- brw_inst_set_src0_da16_swiz_z(devinfo, inst,
- BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
- brw_inst_set_src0_da16_swiz_w(devinfo, inst,
- BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
-
- if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
- /* This is an oddity of the fact we're using the same
- * descriptions for registers in align_16 as align_1:
- */
- brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
- } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
- reg.type == BRW_REGISTER_TYPE_DF &&
- reg.vstride == BRW_VERTICAL_STRIDE_2) {
- /* From SNB PRM:
- *
- * "For Align16 access mode, only encodings of 0000 and 0011
- * are allowed. Other codes are reserved."
- *
- * Presumably the DevSNB behavior applies to IVB as well.
- */
- brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+ if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+ if (reg.width == BRW_WIDTH_1 &&
+ brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
+ brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
+ brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1);
+ brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
+ } else {
+ brw_inst_set_src0_hstride(devinfo, inst, reg.hstride);
+ brw_inst_set_src0_width(devinfo, inst, reg.width);
+ brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
+ }
} else {
- brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
+ brw_inst_set_src0_da16_swiz_x(devinfo, inst,
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
+ brw_inst_set_src0_da16_swiz_y(devinfo, inst,
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
+ brw_inst_set_src0_da16_swiz_z(devinfo, inst,
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
+ brw_inst_set_src0_da16_swiz_w(devinfo, inst,
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
+
+ if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
+ /* This is an oddity of the fact we're using the same
+ * descriptions for registers in align_16 as align_1:
+ */
+ brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+ } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
+ reg.type == BRW_REGISTER_TYPE_DF &&
+ reg.vstride == BRW_VERTICAL_STRIDE_2) {
+ /* From SNB PRM:
+ *
+ * "For Align16 access mode, only encodings of 0000 and 0011
+ * are allowed. Other codes are reserved."
+ *
+ * Presumably the DevSNB behavior applies to IVB as well.
+ */
+ brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+ } else {
+ brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
+ }
}
}
}
if (reg.file == BRW_GENERAL_REGISTER_FILE)
assert(reg.nr < 128);
- /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
- *
- * "Accumulator registers may be accessed explicitly as src0
- * operands only."
- */
- assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
- reg.nr != BRW_ARF_ACCUMULATOR);
-
- gen7_convert_mrf_to_grf(p, ®);
- assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+ if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC ||
+ (devinfo->gen >= 12 &&
+ (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC))) {
+ assert(reg.file == BRW_GENERAL_REGISTER_FILE ||
+ reg.file == BRW_ARCHITECTURE_REGISTER_FILE);
+ assert(reg.address_mode == BRW_ADDRESS_DIRECT);
+ assert(reg.subnr == 0);
+ assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
+ (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+ reg.vstride == reg.width + 1));
+ assert(!reg.negate && !reg.abs);
+ brw_inst_set_send_src1_reg_nr(devinfo, inst, reg.nr);
+ brw_inst_set_send_src1_reg_file(devinfo, inst, reg.file);
+ } else {
+ /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
+ *
+ * "Accumulator registers may be accessed explicitly as src0
+ * operands only."
+ */
+ assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
+ reg.nr != BRW_ARF_ACCUMULATOR);
- brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type);
- brw_inst_set_src1_abs(devinfo, inst, reg.abs);
- brw_inst_set_src1_negate(devinfo, inst, reg.negate);
+ gen7_convert_mrf_to_grf(p, ®);
+ assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
- /* Only src1 can be immediate in two-argument instructions.
- */
- assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE);
+ brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type);
+ brw_inst_set_src1_abs(devinfo, inst, reg.abs);
+ brw_inst_set_src1_negate(devinfo, inst, reg.negate);
- if (reg.file == BRW_IMMEDIATE_VALUE) {
- /* two-argument instructions can only use 32-bit immediates */
- assert(type_sz(reg.type) < 8);
- brw_inst_set_imm_ud(devinfo, inst, reg.ud);
- } else {
- /* This is a hardware restriction, which may or may not be lifted
- * in the future:
+ /* Only src1 can be immediate in two-argument instructions.
*/
- assert (reg.address_mode == BRW_ADDRESS_DIRECT);
- /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
+ assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE);
- brw_inst_set_src1_da_reg_nr(devinfo, inst, reg.nr);
- if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
- brw_inst_set_src1_da1_subreg_nr(devinfo, inst, reg.subnr);
+ if (reg.file == BRW_IMMEDIATE_VALUE) {
+ /* two-argument instructions can only use 32-bit immediates */
+ assert(type_sz(reg.type) < 8);
+ brw_inst_set_imm_ud(devinfo, inst, reg.ud);
} else {
- brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
- }
+ /* This is a hardware restriction, which may or may not be lifted
+ * in the future:
+ */
+ assert (reg.address_mode == BRW_ADDRESS_DIRECT);
+ /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
- if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
- if (reg.width == BRW_WIDTH_1 &&
- brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
- brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
- brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1);
- brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
- } else {
- brw_inst_set_src1_hstride(devinfo, inst, reg.hstride);
- brw_inst_set_src1_width(devinfo, inst, reg.width);
- brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
- }
- } else {
- brw_inst_set_src1_da16_swiz_x(devinfo, inst,
- BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
- brw_inst_set_src1_da16_swiz_y(devinfo, inst,
- BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
- brw_inst_set_src1_da16_swiz_z(devinfo, inst,
- BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
- brw_inst_set_src1_da16_swiz_w(devinfo, inst,
- BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
-
- if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
- /* This is an oddity of the fact we're using the same
- * descriptions for registers in align_16 as align_1:
- */
- brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
- } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
- reg.type == BRW_REGISTER_TYPE_DF &&
- reg.vstride == BRW_VERTICAL_STRIDE_2) {
- /* From SNB PRM:
- *
- * "For Align16 access mode, only encodings of 0000 and 0011
- * are allowed. Other codes are reserved."
- *
- * Presumably the DevSNB behavior applies to IVB as well.
- */
- brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+ brw_inst_set_src1_da_reg_nr(devinfo, inst, reg.nr);
+ if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+ brw_inst_set_src1_da1_subreg_nr(devinfo, inst, reg.subnr);
+ } else {
+ brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
+ }
+
+ if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+ if (reg.width == BRW_WIDTH_1 &&
+ brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
+ brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
+ brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1);
+ brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
+ } else {
+ brw_inst_set_src1_hstride(devinfo, inst, reg.hstride);
+ brw_inst_set_src1_width(devinfo, inst, reg.width);
+ brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
+ }
} else {
- brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
+ brw_inst_set_src1_da16_swiz_x(devinfo, inst,
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
+ brw_inst_set_src1_da16_swiz_y(devinfo, inst,
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
+ brw_inst_set_src1_da16_swiz_z(devinfo, inst,
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
+ brw_inst_set_src1_da16_swiz_w(devinfo, inst,
+ BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
+
+ if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
+ /* This is an oddity of the fact we're using the same
+ * descriptions for registers in align_16 as align_1:
+ */
+ brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+ } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
+ reg.type == BRW_REGISTER_TYPE_DF &&
+ reg.vstride == BRW_VERTICAL_STRIDE_2) {
+ /* From SNB PRM:
+ *
+ * "For Align16 access mode, only encodings of 0000 and 0011
+ * are allowed. Other codes are reserved."
+ *
+ * Presumably the DevSNB behavior applies to IVB as well.
+ */
+ brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+ } else {
+ brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
+ }
}
}
}
const struct gen_device_info *devinfo = p->devinfo;
assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC);
- brw_inst_set_src1_file_type(devinfo, inst,
- BRW_IMMEDIATE_VALUE, BRW_REGISTER_TYPE_UD);
+ if (devinfo->gen < 12)
+ brw_inst_set_src1_file_type(devinfo, inst,
+ BRW_IMMEDIATE_VALUE, BRW_REGISTER_TYPE_UD);
brw_inst_set_send_desc(devinfo, inst, desc);
if (devinfo->gen >= 9)
brw_inst_set_send_ex_desc(devinfo, inst, ex_desc);
}
static enum gen10_align1_3src_vertical_stride
-to_3src_align1_vstride(enum brw_vertical_stride vstride)
+to_3src_align1_vstride(const struct gen_device_info *devinfo,
+ enum brw_vertical_stride vstride)
{
switch (vstride) {
case BRW_VERTICAL_STRIDE_0:
return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0;
+ case BRW_VERTICAL_STRIDE_1:
+ assert(devinfo->gen >= 12);
+ return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_1;
case BRW_VERTICAL_STRIDE_2:
+ assert(devinfo->gen < 12);
return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2;
case BRW_VERTICAL_STRIDE_4:
return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4;
gen7_convert_mrf_to_grf(p, &dest);
assert(dest.nr < 128);
- assert(src0.file != BRW_IMMEDIATE_VALUE || src0.nr < 128);
- assert(src1.file != BRW_IMMEDIATE_VALUE || src1.nr < 128);
- assert(src2.file != BRW_IMMEDIATE_VALUE || src2.nr < 128);
+ assert(src0.file == BRW_IMMEDIATE_VALUE || src0.nr < 128);
+ assert(src1.file != BRW_IMMEDIATE_VALUE && src1.nr < 128);
+ assert(src2.file == BRW_IMMEDIATE_VALUE || src2.nr < 128);
assert(dest.address_mode == BRW_ADDRESS_DIRECT);
assert(src0.address_mode == BRW_ADDRESS_DIRECT);
assert(src1.address_mode == BRW_ADDRESS_DIRECT);
assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
- if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE) {
- brw_inst_set_3src_a1_dst_reg_file(devinfo, inst,
- BRW_ALIGN1_3SRC_ACCUMULATOR);
- brw_inst_set_3src_dst_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
- } else {
- brw_inst_set_3src_a1_dst_reg_file(devinfo, inst,
- BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE);
+ if (devinfo->gen >= 12) {
+ brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, dest.file);
brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
+ } else {
+ if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE) {
+ brw_inst_set_3src_a1_dst_reg_file(devinfo, inst,
+ BRW_ALIGN1_3SRC_ACCUMULATOR);
+ brw_inst_set_3src_dst_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
+ } else {
+ brw_inst_set_3src_a1_dst_reg_file(devinfo, inst,
+ BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE);
+ brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
+ }
}
brw_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, dest.subnr / 8);
brw_inst_set_3src_a1_src1_type(devinfo, inst, src1.type);
brw_inst_set_3src_a1_src2_type(devinfo, inst, src2.type);
- brw_inst_set_3src_a1_src0_vstride(devinfo, inst,
- to_3src_align1_vstride(src0.vstride));
- brw_inst_set_3src_a1_src1_vstride(devinfo, inst,
- to_3src_align1_vstride(src1.vstride));
+ brw_inst_set_3src_a1_src0_vstride(
+ devinfo, inst, to_3src_align1_vstride(devinfo, src0.vstride));
+ brw_inst_set_3src_a1_src1_vstride(
+ devinfo, inst, to_3src_align1_vstride(devinfo, src1.vstride));
/* no vstride on src2 */
brw_inst_set_3src_a1_src0_hstride(devinfo, inst,
assert(src2.file == BRW_GENERAL_REGISTER_FILE ||
src2.file == BRW_IMMEDIATE_VALUE);
- brw_inst_set_3src_a1_src0_reg_file(devinfo, inst,
- src0.file == BRW_GENERAL_REGISTER_FILE ?
- BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
- BRW_ALIGN1_3SRC_IMMEDIATE_VALUE);
- brw_inst_set_3src_a1_src1_reg_file(devinfo, inst,
- src1.file == BRW_GENERAL_REGISTER_FILE ?
- BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
- BRW_ALIGN1_3SRC_ACCUMULATOR);
- brw_inst_set_3src_a1_src2_reg_file(devinfo, inst,
- src2.file == BRW_GENERAL_REGISTER_FILE ?
- BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
- BRW_ALIGN1_3SRC_IMMEDIATE_VALUE);
+ if (devinfo->gen >= 12) {
+ brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, src0.file);
+ brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, src1.file);
+ brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, src2.file);
+ } else {
+ brw_inst_set_3src_a1_src0_reg_file(devinfo, inst,
+ src0.file == BRW_GENERAL_REGISTER_FILE ?
+ BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
+ BRW_ALIGN1_3SRC_IMMEDIATE_VALUE);
+ brw_inst_set_3src_a1_src1_reg_file(devinfo, inst,
+ src1.file == BRW_GENERAL_REGISTER_FILE ?
+ BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
+ BRW_ALIGN1_3SRC_ACCUMULATOR);
+ brw_inst_set_3src_a1_src2_reg_file(devinfo, inst,
+ src2.file == BRW_GENERAL_REGISTER_FILE ?
+ BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
+ BRW_ALIGN1_3SRC_IMMEDIATE_VALUE);
+ }
+
} else {
assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
dest.file == BRW_MESSAGE_REGISTER_FILE);
assert(dest.type == BRW_REGISTER_TYPE_F ||
dest.type == BRW_REGISTER_TYPE_DF ||
dest.type == BRW_REGISTER_TYPE_D ||
- dest.type == BRW_REGISTER_TYPE_UD);
+ dest.type == BRW_REGISTER_TYPE_UD ||
+ (dest.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 8));
if (devinfo->gen == 6) {
brw_inst_set_3src_a16_dst_reg_file(devinfo, inst,
dest.file == BRW_MESSAGE_REGISTER_FILE);
*/
brw_inst_set_3src_a16_src_type(devinfo, inst, dest.type);
brw_inst_set_3src_a16_dst_type(devinfo, inst, dest.type);
+
+ /* From the Bspec, 3D Media GPGPU, Instruction fields, srcType:
+ *
+ * "Three source instructions can use operands with mixed-mode
+ * precision. When SrcType field is set to :f or :hf it defines
+ * precision for source 0 only, and fields Src1Type and Src2Type
+ * define precision for other source operands:
+ *
+ * 0b = :f. Single precision Float (32-bit).
+ * 1b = :hf. Half precision Float (16-bit)."
+ */
+ if (src1.type == BRW_REGISTER_TYPE_HF)
+ brw_inst_set_3src_a16_src1_type(devinfo, inst, 1);
+
+ if (src2.type == BRW_REGISTER_TYPE_HF)
+ brw_inst_set_3src_a16_src2_type(devinfo, inst, 1);
}
}
ALU2(SHL)
ALU1(DIM)
ALU2(ASR)
+ALU2(ROL)
+ALU2(ROR)
ALU3(CSEL)
ALU1(FRC)
ALU1(RNDD)
}
if (needs_zero_fill) {
- brw_inst_set_no_dd_clear(devinfo, inst, true);
+ if (devinfo->gen < 12)
+ brw_inst_set_no_dd_clear(devinfo, inst, true);
inst = brw_MOV(p, suboffset(dst, 1), brw_imm_w(0));
- brw_inst_set_no_dd_check(devinfo, inst, true);
+ if (devinfo->gen < 12)
+ brw_inst_set_no_dd_check(devinfo, inst, true);
}
brw_pop_insn_state(p);
brw_inst_set_uip(devinfo, insn, 0);
} else {
brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
- brw_set_src0(p, insn, brw_imm_d(0));
+ if (devinfo->gen < 12)
+ brw_set_src0(p, insn, brw_imm_d(0));
brw_inst_set_jip(devinfo, insn, 0);
brw_inst_set_uip(devinfo, insn, 0);
}
brw_inst_set_uip(devinfo, insn, 0);
} else {
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src0(p, insn, brw_imm_d(0));
+ if (devinfo->gen < 12)
+ brw_set_src0(p, insn, brw_imm_d(0));
brw_inst_set_jip(devinfo, insn, 0);
brw_inst_set_uip(devinfo, insn, 0);
}
insn = next_insn(p, BRW_OPCODE_HALT);
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- if (devinfo->gen >= 8) {
- brw_set_src0(p, insn, brw_imm_d(0x0));
- } else {
+ if (devinfo->gen < 8) {
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
+ } else if (devinfo->gen < 12) {
+ brw_set_src0(p, insn, brw_imm_d(0x0));
}
brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
if (devinfo->gen >= 8) {
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src0(p, insn, brw_imm_d(0));
+ if (devinfo->gen < 12)
+ brw_set_src0(p, insn, brw_imm_d(0));
brw_inst_set_jip(devinfo, insn, br * (do_insn - insn));
} else if (devinfo->gen == 7) {
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
(devinfo->gen >= 8 && src1.file == BRW_IMMEDIATE_VALUE));
} else {
- assert(src0.type == BRW_REGISTER_TYPE_F);
- assert(src1.type == BRW_REGISTER_TYPE_F);
+ assert(src0.type == BRW_REGISTER_TYPE_F ||
+ (src0.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 9));
+ assert(src1.type == BRW_REGISTER_TYPE_F ||
+ (src1.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 9));
}
/* Source modifiers are ignored for extended math instructions on Gen6. */
struct brw_reg dst,
struct brw_reg payload,
struct brw_reg desc,
- unsigned desc_imm)
+ unsigned desc_imm,
+ bool eot)
{
const struct gen_device_info *devinfo = p->devinfo;
struct brw_inst *send;
if (desc.file == BRW_IMMEDIATE_VALUE) {
send = next_insn(p, BRW_OPCODE_SEND);
+ brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
brw_set_desc(p, send, desc.ud | desc_imm);
-
} else {
struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
brw_pop_insn_state(p);
send = next_insn(p, BRW_OPCODE_SEND);
- brw_set_src1(p, send, addr);
+ brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
+
+ if (devinfo->gen >= 12)
+ brw_inst_set_send_sel_reg32_desc(devinfo, send, true);
+ else
+ brw_set_src1(p, send, addr);
}
brw_set_dest(p, send, dst);
- brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
brw_inst_set_sfid(devinfo, send, sfid);
+ brw_inst_set_eot(devinfo, send, eot);
+}
+
+void
+brw_send_indirect_split_message(struct brw_codegen *p,
+ unsigned sfid,
+ struct brw_reg dst,
+ struct brw_reg payload0,
+ struct brw_reg payload1,
+ struct brw_reg desc,
+ unsigned desc_imm,
+ struct brw_reg ex_desc,
+ unsigned ex_desc_imm,
+ bool eot)
+{
+ const struct gen_device_info *devinfo = p->devinfo;
+ struct brw_inst *send;
+
+ dst = retype(dst, BRW_REGISTER_TYPE_UW);
+
+ assert(desc.type == BRW_REGISTER_TYPE_UD);
+
+ if (desc.file == BRW_IMMEDIATE_VALUE) {
+ desc.ud |= desc_imm;
+ } else {
+ struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
+
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
+ brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* Load the indirect descriptor to an address register using OR so the
+ * caller can specify additional descriptor bits with the desc_imm
+ * immediate.
+ */
+ brw_OR(p, addr, desc, brw_imm_ud(desc_imm));
+
+ brw_pop_insn_state(p);
+ desc = addr;
+ }
+
+ if (ex_desc.file == BRW_IMMEDIATE_VALUE &&
+ (ex_desc.ud & INTEL_MASK(15, 12)) == 0) {
+ ex_desc.ud |= ex_desc_imm;
+ } else {
+ struct brw_reg addr = retype(brw_address_reg(2), BRW_REGISTER_TYPE_UD);
+
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
+ brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* Load the indirect extended descriptor to an address register using OR
+ * so the caller can specify additional descriptor bits with the
+ * desc_imm immediate.
+ *
+ * Even though the instruction dispatcher always pulls the SFID and EOT
+ * fields from the instruction itself, actual external unit which
+ * processes the message gets the SFID and EOT from the extended
+ * descriptor which comes from the address register. If we don't OR
+ * those two bits in, the external unit may get confused and hang.
+ */
+ unsigned imm_part = ex_desc_imm | sfid | eot << 5;
+
+ if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
+ /* ex_desc bits 15:12 don't exist in the instruction encoding, so
+ * we may have fallen back to an indirect extended descriptor.
+ */
+ brw_MOV(p, addr, brw_imm_ud(ex_desc.ud | imm_part));
+ } else {
+ brw_OR(p, addr, ex_desc, brw_imm_ud(imm_part));
+ }
+
+ brw_pop_insn_state(p);
+ ex_desc = addr;
+ }
+
+ send = next_insn(p, devinfo->gen >= 12 ? BRW_OPCODE_SEND : BRW_OPCODE_SENDS);
+ brw_set_dest(p, send, dst);
+ brw_set_src0(p, send, retype(payload0, BRW_REGISTER_TYPE_UD));
+ brw_set_src1(p, send, retype(payload1, BRW_REGISTER_TYPE_UD));
+
+ if (desc.file == BRW_IMMEDIATE_VALUE) {
+ brw_inst_set_send_sel_reg32_desc(devinfo, send, 0);
+ brw_inst_set_send_desc(devinfo, send, desc.ud);
+ } else {
+ assert(desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
+ assert(desc.nr == BRW_ARF_ADDRESS);
+ assert(desc.subnr == 0);
+ brw_inst_set_send_sel_reg32_desc(devinfo, send, 1);
+ }
+
+ if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
+ brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0);
+ brw_inst_set_sends_ex_desc(devinfo, send, ex_desc.ud);
+ } else {
+ assert(ex_desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
+ assert(ex_desc.nr == BRW_ARF_ADDRESS);
+ assert((ex_desc.subnr & 0x3) == 0);
+ brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1);
+ brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, ex_desc.subnr >> 2);
+ }
+
+ brw_inst_set_sfid(devinfo, send, sfid);
+ brw_inst_set_eot(devinfo, send, eot);
}
static void
surface = addr;
}
- brw_send_indirect_message(p, sfid, dst, payload, surface, desc_imm);
+ brw_send_indirect_message(p, sfid, dst, payload, surface, desc_imm, false);
}
static bool
case BRW_OPCODE_HALT:
if (depth == 0)
return offset;
+ default:
+ break;
}
}
assert(brw_inst_uip(devinfo, insn) != 0);
assert(brw_inst_jip(devinfo, insn) != 0);
break;
+
+ default:
+ break;
}
}
}
payload, surface, desc);
}
-void
-brw_untyped_atomic_float(struct brw_codegen *p,
- struct brw_reg dst,
- struct brw_reg payload,
- struct brw_reg surface,
- unsigned atomic_op,
- unsigned msg_length,
- bool response_expected,
- bool header_present)
-{
- const struct gen_device_info *devinfo = p->devinfo;
-
- assert(devinfo->gen >= 9);
- assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
-
- const unsigned sfid = HSW_SFID_DATAPORT_DATA_CACHE_1;
- const unsigned exec_size = 1 << brw_get_default_exec_size(p);
- const unsigned response_length =
- brw_surface_payload_size(p, response_expected, exec_size);
- const unsigned desc =
- brw_message_desc(devinfo, msg_length, response_length, header_present) |
- brw_dp_untyped_atomic_float_desc(devinfo, exec_size, atomic_op,
- response_expected);
-
- brw_send_indirect_surface_message(p, sfid,
- brw_writemask(dst, WRITEMASK_XYZW),
- payload, surface, desc);
-}
-
void
brw_untyped_surface_read(struct brw_codegen *p,
struct brw_reg dst,
payload, surface, desc);
}
-void
-brw_byte_scattered_read(struct brw_codegen *p,
- struct brw_reg dst,
- struct brw_reg payload,
- struct brw_reg surface,
- unsigned msg_length,
- unsigned bit_size)
-{
- const struct gen_device_info *devinfo = p->devinfo;
- assert(devinfo->gen > 7 || devinfo->is_haswell);
- assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
- const unsigned exec_size = 1 << brw_get_default_exec_size(p);
- const unsigned response_length = brw_surface_payload_size(p, 1, exec_size);
- const unsigned desc =
- brw_message_desc(devinfo, msg_length, response_length, false) |
- brw_dp_byte_scattered_rw_desc(devinfo, exec_size, bit_size, false);
-
- brw_send_indirect_surface_message(p, GEN7_SFID_DATAPORT_DATA_CACHE,
- dst, payload, surface, desc);
-}
-
-void
-brw_byte_scattered_write(struct brw_codegen *p,
- struct brw_reg payload,
- struct brw_reg surface,
- unsigned msg_length,
- unsigned bit_size,
- bool header_present)
-{
- const struct gen_device_info *devinfo = p->devinfo;
- assert(devinfo->gen > 7 || devinfo->is_haswell);
- assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
- const unsigned exec_size = 1 << brw_get_default_exec_size(p);
- const unsigned desc =
- brw_message_desc(devinfo, msg_length, 0, header_present) |
- brw_dp_byte_scattered_rw_desc(devinfo, exec_size, bit_size, true);
-
- brw_send_indirect_surface_message(p, GEN7_SFID_DATAPORT_DATA_CACHE,
- brw_writemask(brw_null_reg(),
- WRITEMASK_XYZW),
- payload, surface, desc);
-}
-
-void
-brw_typed_atomic(struct brw_codegen *p,
- struct brw_reg dst,
- struct brw_reg payload,
- struct brw_reg surface,
- unsigned atomic_op,
- unsigned msg_length,
- bool response_expected,
- bool header_present) {
- const struct gen_device_info *devinfo = p->devinfo;
- const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
- HSW_SFID_DATAPORT_DATA_CACHE_1 :
- GEN6_SFID_DATAPORT_RENDER_CACHE);
- const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
- /* SIMD4x2 typed atomic instructions only exist on HSW+ */
- const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell;
- const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) :
- has_simd4x2 ? 0 : 8;
- /* Typed atomics don't support SIMD16 */
- assert(exec_size <= 8);
- const unsigned response_length =
- brw_surface_payload_size(p, response_expected, exec_size);
- const unsigned desc =
- brw_message_desc(devinfo, msg_length, response_length, header_present) |
- brw_dp_typed_atomic_desc(devinfo, exec_size, brw_get_default_group(p),
- atomic_op, response_expected);
- /* Mask out unused components -- See comment in brw_untyped_atomic(). */
- const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X;
-
- brw_send_indirect_surface_message(p, sfid, brw_writemask(dst, mask),
- payload, surface, desc);
-}
-
-void
-brw_typed_surface_read(struct brw_codegen *p,
- struct brw_reg dst,
- struct brw_reg payload,
- struct brw_reg surface,
- unsigned msg_length,
- unsigned num_channels,
- bool header_present)
-{
- const struct gen_device_info *devinfo = p->devinfo;
- const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
- HSW_SFID_DATAPORT_DATA_CACHE_1 :
- GEN6_SFID_DATAPORT_RENDER_CACHE);
- const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
- /* SIMD4x2 typed read instructions only exist on HSW+ */
- const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell;
- const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) :
- has_simd4x2 ? 0 : 8;
- const unsigned response_length =
- brw_surface_payload_size(p, num_channels, exec_size);
- const unsigned desc =
- brw_message_desc(devinfo, msg_length, response_length, header_present) |
- brw_dp_typed_surface_rw_desc(devinfo, exec_size, brw_get_default_group(p),
- num_channels, false);
-
- brw_send_indirect_surface_message(p, sfid, dst, payload, surface, desc);
-}
-
-void
-brw_typed_surface_write(struct brw_codegen *p,
- struct brw_reg payload,
- struct brw_reg surface,
- unsigned msg_length,
- unsigned num_channels,
- bool header_present)
-{
- const struct gen_device_info *devinfo = p->devinfo;
- const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
- HSW_SFID_DATAPORT_DATA_CACHE_1 :
- GEN6_SFID_DATAPORT_RENDER_CACHE);
- const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
- /* SIMD4x2 typed read instructions only exist on HSW+ */
- const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell;
- const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) :
- has_simd4x2 ? 0 : 8;
- const unsigned desc =
- brw_message_desc(devinfo, msg_length, 0, header_present) |
- brw_dp_typed_surface_rw_desc(devinfo, exec_size, brw_get_default_group(p),
- num_channels, true);
- /* Mask out unused components -- See comment in brw_untyped_atomic(). */
- const unsigned mask = !has_simd4x2 && !align1 ? WRITEMASK_X : WRITEMASK_XYZW;
-
- brw_send_indirect_surface_message(p, sfid, brw_writemask(brw_null_reg(), mask),
- payload, surface, desc);
-}
-
static void
brw_set_memory_fence_message(struct brw_codegen *p,
struct brw_inst *insn,
enum brw_message_target sfid,
- bool commit_enable)
+ bool commit_enable,
+ unsigned bti)
{
const struct gen_device_info *devinfo = p->devinfo;
if (commit_enable)
brw_inst_set_dp_msg_control(devinfo, insn, 1 << 5);
+
+ assert(devinfo->gen >= 11 || bti == 0);
+ brw_inst_set_binding_table_index(devinfo, insn, bti);
}
void
brw_memory_fence(struct brw_codegen *p,
struct brw_reg dst,
- enum opcode send_op)
+ struct brw_reg src,
+ enum opcode send_op,
+ bool stall,
+ unsigned bti)
{
const struct gen_device_info *devinfo = p->devinfo;
- const bool commit_enable =
+ const bool commit_enable = stall ||
devinfo->gen >= 10 || /* HSD ES # 1404612949 */
(devinfo->gen == 7 && !devinfo->is_haswell);
struct brw_inst *insn;
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_exec_size(p, BRW_EXECUTE_1);
- dst = vec1(dst);
+ dst = retype(vec1(dst), BRW_REGISTER_TYPE_UW);
+ src = retype(vec1(src), BRW_REGISTER_TYPE_UD);
/* Set dst as destination for dependency tracking, the MEMORY_FENCE
* message doesn't write anything back.
*/
insn = next_insn(p, send_op);
- dst = retype(dst, BRW_REGISTER_TYPE_UW);
brw_set_dest(p, insn, dst);
- brw_set_src0(p, insn, dst);
+ brw_set_src0(p, insn, src);
brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
- commit_enable);
+ commit_enable, bti);
if (devinfo->gen == 7 && !devinfo->is_haswell) {
/* IVB does typed surface access through the render cache, so we need to
*/
insn = next_insn(p, send_op);
brw_set_dest(p, insn, offset(dst, 1));
- brw_set_src0(p, insn, offset(dst, 1));
+ brw_set_src0(p, insn, src);
brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,
- commit_enable);
+ commit_enable, bti);
/* Now write the response of the second message into the response of the
* first to trigger a pipeline stall -- This way future render and data
brw_MOV(p, dst, offset(dst, 1));
}
+ if (stall)
+ brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW), dst);
+
brw_pop_insn_state(p);
}
dest,
mrf,
vec1(data),
- desc);
+ desc,
+ false);
}
void
brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
}
-/**
- * Changes the floating point rounding mode updating the control register
- * field defined at cr0.0[5-6] bits. This function supports the changes to
- * RTNE (00), RU (01), RD (10) and RTZ (11) rounding using bitwise operations.
- * Only RTNE and RTZ rounding are enabled at nir.
- */
void
-brw_rounding_mode(struct brw_codegen *p,
- enum brw_rnd_mode mode)
-{
- const unsigned bits = mode << BRW_CR0_RND_MODE_SHIFT;
-
- if (bits != BRW_CR0_RND_MODE_MASK) {
- brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
- brw_imm_ud(~BRW_CR0_RND_MODE_MASK));
- brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
-
- /* From the Skylake PRM, Volume 7, page 760:
- * "Implementation Restriction on Register Access: When the control
- * register is used as an explicit source and/or destination, hardware
- * does not ensure execution pipeline coherency. Software must set the
- * thread control field to ‘switch’ for an instruction that uses
- * control register as an explicit operand."
- */
+brw_float_controls_mode(struct brw_codegen *p,
+ unsigned mode, unsigned mask)
+{
+ brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
+ brw_imm_ud(~mask));
+ brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
+
+ /* From the Skylake PRM, Volume 7, page 760:
+ * "Implementation Restriction on Register Access: When the control
+ * register is used as an explicit source and/or destination, hardware
+ * does not ensure execution pipeline coherency. Software must set the
+ * thread control field to ‘switch’ for an instruction that uses
+ * control register as an explicit operand."
+ */
+ if (p->devinfo->gen < 12)
brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
- }
- if (bits) {
- brw_inst *inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
- brw_imm_ud(bits));
- brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
- brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
+ if (mode) {
+ brw_inst *inst_or = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
+ brw_imm_ud(mode));
+ brw_inst_set_exec_size(p->devinfo, inst_or, BRW_EXECUTE_1);
+ if (p->devinfo->gen < 12)
+ brw_inst_set_thread_control(p->devinfo, inst_or, BRW_THREAD_SWITCH);
}
}