X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fcompiler%2Fbrw_eu_emit.c;h=0d5c755f9e8d36e9ee1b7db70af4b2b63fc9aed6;hb=72dc06e07e3f8b9ed5bb46e3927b8f87dd24e42b;hp=4bd01a55cb70f71c8dc1b7bb54b318ab5d733646;hpb=d2d3e0450119d8c52a3dd0dd89bfc9182a1cfce7;p=mesa.git diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 4bd01a55cb7..0d5c755f9e8 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -55,6 +55,7 @@ gen6_resolve_implied_move(struct brw_codegen *p, return; if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { + assert(devinfo->gen < 12); brw_push_insn_state(p); brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_mask_control(p, BRW_MASK_DISABLE); @@ -94,48 +95,89 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest) else if (dest.file == BRW_GENERAL_REGISTER_FILE) assert(dest.nr < 128); - gen7_convert_mrf_to_grf(p, &dest); + /* The hardware has a restriction where a destination of size Byte with + * a stride of 1 is only allowed for a packed byte MOV. For any other + * instruction, the stride must be at least 2, even when the destination + * is the NULL register. + */ + if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && + dest.nr == BRW_ARF_NULL && + type_sz(dest.type) == 1 && + dest.hstride == BRW_HORIZONTAL_STRIDE_1) { + dest.hstride = BRW_HORIZONTAL_STRIDE_2; + } - brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type); - brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode); + gen7_convert_mrf_to_grf(p, &dest); - if (dest.address_mode == BRW_ADDRESS_DIRECT) { + if (devinfo->gen >= 12 && + (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) { + assert(dest.file == BRW_GENERAL_REGISTER_FILE || + dest.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(dest.address_mode == BRW_ADDRESS_DIRECT); + assert(dest.subnr == 0); + assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 || + (dest.hstride == BRW_HORIZONTAL_STRIDE_1 && + dest.vstride == dest.width + 1)); + assert(!dest.negate && !dest.abs); + brw_inst_set_dst_reg_file(devinfo, inst, dest.file); brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr); - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_inst_set_dst_da1_subreg_nr(devinfo, inst, dest.subnr); - if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) - dest.hstride = BRW_HORIZONTAL_STRIDE_1; - brw_inst_set_dst_hstride(devinfo, inst, dest.hstride); - } else { - brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16); - brw_inst_set_da16_writemask(devinfo, inst, dest.writemask); - if (dest.file == BRW_GENERAL_REGISTER_FILE || - dest.file == BRW_MESSAGE_REGISTER_FILE) { - assert(dest.writemask != 0); - } - /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1: - * Although Dst.HorzStride is a don't care for Align16, HW needs - * this to be programmed as "01". - */ - brw_inst_set_dst_hstride(devinfo, inst, 1); - } + } else if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) { + assert(devinfo->gen < 12); + assert(dest.file == BRW_GENERAL_REGISTER_FILE || + dest.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(dest.address_mode == BRW_ADDRESS_DIRECT); + assert(dest.subnr % 16 == 0); + assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 && + dest.vstride == dest.width + 1); + assert(!dest.negate && !dest.abs); + brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr); + brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16); + brw_inst_set_send_dst_reg_file(devinfo, inst, dest.file); } else { - brw_inst_set_dst_ia_subreg_nr(devinfo, inst, dest.subnr); + brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type); + brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode); - /* These are different sizes in align1 vs align16: - */ - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_inst_set_dst_ia1_addr_imm(devinfo, inst, - dest.indirect_offset); - if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) - dest.hstride = BRW_HORIZONTAL_STRIDE_1; - brw_inst_set_dst_hstride(devinfo, inst, dest.hstride); + if (dest.address_mode == BRW_ADDRESS_DIRECT) { + brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr); + + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + brw_inst_set_dst_da1_subreg_nr(devinfo, inst, dest.subnr); + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + brw_inst_set_dst_hstride(devinfo, inst, dest.hstride); + } else { + brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16); + brw_inst_set_da16_writemask(devinfo, inst, dest.writemask); + if (dest.file == BRW_GENERAL_REGISTER_FILE || + dest.file == BRW_MESSAGE_REGISTER_FILE) { + assert(dest.writemask != 0); + } + /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1: + * Although Dst.HorzStride is a don't care for Align16, HW needs + * this to be programmed as "01". + */ + brw_inst_set_dst_hstride(devinfo, inst, 1); + } } else { - brw_inst_set_dst_ia16_addr_imm(devinfo, inst, - dest.indirect_offset); - /* even ignored in da16, still need to set as '01' */ - brw_inst_set_dst_hstride(devinfo, inst, 1); + brw_inst_set_dst_ia_subreg_nr(devinfo, inst, dest.subnr); + + /* These are different sizes in align1 vs align16: + */ + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + brw_inst_set_dst_ia1_addr_imm(devinfo, inst, + dest.indirect_offset); + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + brw_inst_set_dst_hstride(devinfo, inst, dest.hstride); + } else { + brw_inst_set_dst_ia16_addr_imm(devinfo, inst, + dest.indirect_offset); + /* even ignored in da16, still need to set as '01' */ + brw_inst_set_dst_hstride(devinfo, inst, 1); + } } } @@ -175,8 +217,11 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) gen7_convert_mrf_to_grf(p, ®); - if (devinfo->gen >= 6 && (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || - brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) { + if (devinfo->gen >= 6 && + (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC)) { /* Any source modifiers or regions will be ignored, since this just * identifies the MRF/GRF to start reading the message contents from. * Check for some likely failures. @@ -186,84 +231,110 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) assert(reg.address_mode == BRW_ADDRESS_DIRECT); } - brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type); - brw_inst_set_src0_abs(devinfo, inst, reg.abs); - brw_inst_set_src0_negate(devinfo, inst, reg.negate); - brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode); - - if (reg.file == BRW_IMMEDIATE_VALUE) { - if (reg.type == BRW_REGISTER_TYPE_DF || - brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DIM) - brw_inst_set_imm_df(devinfo, inst, reg.df); - else if (reg.type == BRW_REGISTER_TYPE_UQ || - reg.type == BRW_REGISTER_TYPE_Q) - brw_inst_set_imm_uq(devinfo, inst, reg.u64); - else - brw_inst_set_imm_ud(devinfo, inst, reg.ud); - - if (type_sz(reg.type) < 8) { - brw_inst_set_src1_reg_file(devinfo, inst, - BRW_ARCHITECTURE_REGISTER_FILE); - brw_inst_set_src1_reg_hw_type(devinfo, inst, - brw_inst_src0_reg_hw_type(devinfo, inst)); - } + if (devinfo->gen >= 12 && + (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) { + assert(reg.file != BRW_IMMEDIATE_VALUE); + assert(reg.address_mode == BRW_ADDRESS_DIRECT); + assert(reg.subnr == 0); + assert(has_scalar_region(reg) || + (reg.hstride == BRW_HORIZONTAL_STRIDE_1 && + reg.vstride == reg.width + 1)); + assert(!reg.negate && !reg.abs); + brw_inst_set_send_src0_reg_file(devinfo, inst, reg.file); + brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr); + + } else if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) { + assert(reg.file == BRW_GENERAL_REGISTER_FILE); + assert(reg.address_mode == BRW_ADDRESS_DIRECT); + assert(reg.subnr % 16 == 0); + assert(has_scalar_region(reg) || + (reg.hstride == BRW_HORIZONTAL_STRIDE_1 && + reg.vstride == reg.width + 1)); + assert(!reg.negate && !reg.abs); + brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr); + brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16); } else { - if (reg.address_mode == BRW_ADDRESS_DIRECT) { - brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr); - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_inst_set_src0_da1_subreg_nr(devinfo, inst, reg.subnr); - } else { - brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16); - } + brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type); + brw_inst_set_src0_abs(devinfo, inst, reg.abs); + brw_inst_set_src0_negate(devinfo, inst, reg.negate); + brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode); + + if (reg.file == BRW_IMMEDIATE_VALUE) { + if (reg.type == BRW_REGISTER_TYPE_DF || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DIM) + brw_inst_set_imm_df(devinfo, inst, reg.df); + else if (reg.type == BRW_REGISTER_TYPE_UQ || + reg.type == BRW_REGISTER_TYPE_Q) + brw_inst_set_imm_uq(devinfo, inst, reg.u64); + else + brw_inst_set_imm_ud(devinfo, inst, reg.ud); + + if (devinfo->gen < 12 && type_sz(reg.type) < 8) { + brw_inst_set_src1_reg_file(devinfo, inst, + BRW_ARCHITECTURE_REGISTER_FILE); + brw_inst_set_src1_reg_hw_type(devinfo, inst, + brw_inst_src0_reg_hw_type(devinfo, inst)); + } } else { - brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr); + if (reg.address_mode == BRW_ADDRESS_DIRECT) { + brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr); + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + brw_inst_set_src0_da1_subreg_nr(devinfo, inst, reg.subnr); + } else { + brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16); + } + } else { + brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr); - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset); - } else { - brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset); - } - } + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset); + } else { + brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset); + } + } - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - if (reg.width == BRW_WIDTH_1 && - brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) { - brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0); - brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1); - brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0); - } else { - brw_inst_set_src0_hstride(devinfo, inst, reg.hstride); - brw_inst_set_src0_width(devinfo, inst, reg.width); - brw_inst_set_src0_vstride(devinfo, inst, reg.vstride); - } - } else { - brw_inst_set_src0_da16_swiz_x(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X)); - brw_inst_set_src0_da16_swiz_y(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y)); - brw_inst_set_src0_da16_swiz_z(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z)); - brw_inst_set_src0_da16_swiz_w(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W)); - - if (reg.vstride == BRW_VERTICAL_STRIDE_8) { - /* This is an oddity of the fact we're using the same - * descriptions for registers in align_16 as align_1: - */ - brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); - } else if (devinfo->gen == 7 && !devinfo->is_haswell && - reg.type == BRW_REGISTER_TYPE_DF && - reg.vstride == BRW_VERTICAL_STRIDE_2) { - /* From SNB PRM: - * - * "For Align16 access mode, only encodings of 0000 and 0011 - * are allowed. Other codes are reserved." - * - * Presumably the DevSNB behavior applies to IVB as well. - */ - brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) { + brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0); + brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1); + brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0); + } else { + brw_inst_set_src0_hstride(devinfo, inst, reg.hstride); + brw_inst_set_src0_width(devinfo, inst, reg.width); + brw_inst_set_src0_vstride(devinfo, inst, reg.vstride); + } } else { - brw_inst_set_src0_vstride(devinfo, inst, reg.vstride); + brw_inst_set_src0_da16_swiz_x(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X)); + brw_inst_set_src0_da16_swiz_y(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y)); + brw_inst_set_src0_da16_swiz_z(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z)); + brw_inst_set_src0_da16_swiz_w(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W)); + + if (reg.vstride == BRW_VERTICAL_STRIDE_8) { + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + } else if (devinfo->gen == 7 && !devinfo->is_haswell && + reg.type == BRW_REGISTER_TYPE_DF && + reg.vstride == BRW_VERTICAL_STRIDE_2) { + /* From SNB PRM: + * + * "For Align16 access mode, only encodings of 0000 and 0011 + * are allowed. Other codes are reserved." + * + * Presumably the DevSNB behavior applies to IVB as well. + */ + brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + } else { + brw_inst_set_src0_vstride(devinfo, inst, reg.vstride); + } } } } @@ -278,82 +349,99 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) if (reg.file == BRW_GENERAL_REGISTER_FILE) assert(reg.nr < 128); - /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5: - * - * "Accumulator registers may be accessed explicitly as src0 - * operands only." - */ - assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE || - reg.nr != BRW_ARF_ACCUMULATOR); - - gen7_convert_mrf_to_grf(p, ®); - assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC || + (devinfo->gen >= 12 && + (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC))) { + assert(reg.file == BRW_GENERAL_REGISTER_FILE || + reg.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(reg.address_mode == BRW_ADDRESS_DIRECT); + assert(reg.subnr == 0); + assert(has_scalar_region(reg) || + (reg.hstride == BRW_HORIZONTAL_STRIDE_1 && + reg.vstride == reg.width + 1)); + assert(!reg.negate && !reg.abs); + brw_inst_set_send_src1_reg_nr(devinfo, inst, reg.nr); + brw_inst_set_send_src1_reg_file(devinfo, inst, reg.file); + } else { + /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5: + * + * "Accumulator registers may be accessed explicitly as src0 + * operands only." + */ + assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE || + reg.nr != BRW_ARF_ACCUMULATOR); - brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type); - brw_inst_set_src1_abs(devinfo, inst, reg.abs); - brw_inst_set_src1_negate(devinfo, inst, reg.negate); + gen7_convert_mrf_to_grf(p, ®); + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); - /* Only src1 can be immediate in two-argument instructions. - */ - assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE); + brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type); + brw_inst_set_src1_abs(devinfo, inst, reg.abs); + brw_inst_set_src1_negate(devinfo, inst, reg.negate); - if (reg.file == BRW_IMMEDIATE_VALUE) { - /* two-argument instructions can only use 32-bit immediates */ - assert(type_sz(reg.type) < 8); - brw_inst_set_imm_ud(devinfo, inst, reg.ud); - } else { - /* This is a hardware restriction, which may or may not be lifted - * in the future: + /* Only src1 can be immediate in two-argument instructions. */ - assert (reg.address_mode == BRW_ADDRESS_DIRECT); - /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ + assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE); - brw_inst_set_src1_da_reg_nr(devinfo, inst, reg.nr); - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_inst_set_src1_da1_subreg_nr(devinfo, inst, reg.subnr); + if (reg.file == BRW_IMMEDIATE_VALUE) { + /* two-argument instructions can only use 32-bit immediates */ + assert(type_sz(reg.type) < 8); + brw_inst_set_imm_ud(devinfo, inst, reg.ud); } else { - brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16); - } + /* This is a hardware restriction, which may or may not be lifted + * in the future: + */ + assert (reg.address_mode == BRW_ADDRESS_DIRECT); + /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - if (reg.width == BRW_WIDTH_1 && - brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) { - brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0); - brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1); - brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0); - } else { - brw_inst_set_src1_hstride(devinfo, inst, reg.hstride); - brw_inst_set_src1_width(devinfo, inst, reg.width); - brw_inst_set_src1_vstride(devinfo, inst, reg.vstride); - } - } else { - brw_inst_set_src1_da16_swiz_x(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X)); - brw_inst_set_src1_da16_swiz_y(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y)); - brw_inst_set_src1_da16_swiz_z(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z)); - brw_inst_set_src1_da16_swiz_w(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W)); - - if (reg.vstride == BRW_VERTICAL_STRIDE_8) { - /* This is an oddity of the fact we're using the same - * descriptions for registers in align_16 as align_1: - */ - brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); - } else if (devinfo->gen == 7 && !devinfo->is_haswell && - reg.type == BRW_REGISTER_TYPE_DF && - reg.vstride == BRW_VERTICAL_STRIDE_2) { - /* From SNB PRM: - * - * "For Align16 access mode, only encodings of 0000 and 0011 - * are allowed. Other codes are reserved." - * - * Presumably the DevSNB behavior applies to IVB as well. - */ - brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + brw_inst_set_src1_da_reg_nr(devinfo, inst, reg.nr); + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + brw_inst_set_src1_da1_subreg_nr(devinfo, inst, reg.subnr); + } else { + brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16); + } + + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) { + brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0); + brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1); + brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0); + } else { + brw_inst_set_src1_hstride(devinfo, inst, reg.hstride); + brw_inst_set_src1_width(devinfo, inst, reg.width); + brw_inst_set_src1_vstride(devinfo, inst, reg.vstride); + } } else { - brw_inst_set_src1_vstride(devinfo, inst, reg.vstride); + brw_inst_set_src1_da16_swiz_x(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X)); + brw_inst_set_src1_da16_swiz_y(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y)); + brw_inst_set_src1_da16_swiz_z(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z)); + brw_inst_set_src1_da16_swiz_w(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W)); + + if (reg.vstride == BRW_VERTICAL_STRIDE_8) { + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + } else if (devinfo->gen == 7 && !devinfo->is_haswell && + reg.type == BRW_REGISTER_TYPE_DF && + reg.vstride == BRW_VERTICAL_STRIDE_2) { + /* From SNB PRM: + * + * "For Align16 access mode, only encodings of 0000 and 0011 + * are allowed. Other codes are reserved." + * + * Presumably the DevSNB behavior applies to IVB as well. + */ + brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + } else { + brw_inst_set_src1_vstride(devinfo, inst, reg.vstride); + } } } } @@ -370,8 +458,9 @@ brw_set_desc_ex(struct brw_codegen *p, brw_inst *inst, const struct gen_device_info *devinfo = p->devinfo; assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC); - brw_inst_set_src1_file_type(devinfo, inst, - BRW_IMMEDIATE_VALUE, BRW_REGISTER_TYPE_UD); + if (devinfo->gen < 12) + brw_inst_set_src1_file_type(devinfo, inst, + BRW_IMMEDIATE_VALUE, BRW_REGISTER_TYPE_UD); brw_inst_set_send_desc(devinfo, inst, desc); if (devinfo->gen >= 9) brw_inst_set_send_ex_desc(devinfo, inst, ex_desc); @@ -505,7 +594,7 @@ gen7_set_dp_scratch_message(struct brw_codegen *p, const struct gen_device_info *devinfo = p->devinfo; assert(num_regs == 1 || num_regs == 2 || num_regs == 4 || (devinfo->gen >= 8 && num_regs == 8)); - const unsigned block_size = (devinfo->gen >= 8 ? _mesa_logbase2(num_regs) : + const unsigned block_size = (devinfo->gen >= 8 ? util_logbase2(num_regs) : num_regs - 1); brw_set_desc(p, inst, brw_message_desc( @@ -530,6 +619,8 @@ brw_inst_set_state(const struct gen_device_info *devinfo, brw_inst_set_compression(devinfo, insn, state->compressed); brw_inst_set_access_mode(devinfo, insn, state->access_mode); brw_inst_set_mask_control(devinfo, insn, state->mask_control); + if (devinfo->gen >= 12) + brw_inst_set_swsb(devinfo, insn, tgl_swsb_encode(state->swsb)); brw_inst_set_saturate(devinfo, insn, state->saturate); brw_inst_set_pred_control(devinfo, insn, state->predicate); brw_inst_set_pred_inv(devinfo, insn, state->pred_inv); @@ -609,12 +700,17 @@ get_3src_subreg_nr(struct brw_reg reg) } static enum gen10_align1_3src_vertical_stride -to_3src_align1_vstride(enum brw_vertical_stride vstride) +to_3src_align1_vstride(const struct gen_device_info *devinfo, + enum brw_vertical_stride vstride) { switch (vstride) { case BRW_VERTICAL_STRIDE_0: return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0; + case BRW_VERTICAL_STRIDE_1: + assert(devinfo->gen >= 12); + return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_1; case BRW_VERTICAL_STRIDE_2: + assert(devinfo->gen < 12); return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2; case BRW_VERTICAL_STRIDE_4: return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4; @@ -654,9 +750,14 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, gen7_convert_mrf_to_grf(p, &dest); assert(dest.nr < 128); - assert(src0.file != BRW_IMMEDIATE_VALUE || src0.nr < 128); - assert(src1.file != BRW_IMMEDIATE_VALUE || src1.nr < 128); - assert(src2.file != BRW_IMMEDIATE_VALUE || src2.nr < 128); + + if (devinfo->gen >= 10) + assert(!(src0.file == BRW_IMMEDIATE_VALUE && + src2.file == BRW_IMMEDIATE_VALUE)); + + assert(src0.file == BRW_IMMEDIATE_VALUE || src0.nr < 128); + assert(src1.file != BRW_IMMEDIATE_VALUE && src1.nr < 128); + assert(src2.file == BRW_IMMEDIATE_VALUE || src2.nr < 128); assert(dest.address_mode == BRW_ADDRESS_DIRECT); assert(src0.address_mode == BRW_ADDRESS_DIRECT); assert(src1.address_mode == BRW_ADDRESS_DIRECT); @@ -666,14 +767,19 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, assert(dest.file == BRW_GENERAL_REGISTER_FILE || dest.file == BRW_ARCHITECTURE_REGISTER_FILE); - if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE) { - brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, - BRW_ALIGN1_3SRC_ACCUMULATOR); - brw_inst_set_3src_dst_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR); - } else { - brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, - BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE); + if (devinfo->gen >= 12) { + brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, dest.file); brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr); + } else { + if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE) { + brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, + BRW_ALIGN1_3SRC_ACCUMULATOR); + brw_inst_set_3src_dst_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR); + } else { + brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, + BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE); + brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr); + } } brw_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, dest.subnr / 8); @@ -692,27 +798,26 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, brw_inst_set_3src_a1_src1_type(devinfo, inst, src1.type); brw_inst_set_3src_a1_src2_type(devinfo, inst, src2.type); - brw_inst_set_3src_a1_src0_vstride(devinfo, inst, - to_3src_align1_vstride(src0.vstride)); - brw_inst_set_3src_a1_src1_vstride(devinfo, inst, - to_3src_align1_vstride(src1.vstride)); - /* no vstride on src2 */ - - brw_inst_set_3src_a1_src0_hstride(devinfo, inst, - to_3src_align1_hstride(src0.hstride)); - brw_inst_set_3src_a1_src1_hstride(devinfo, inst, - to_3src_align1_hstride(src1.hstride)); - brw_inst_set_3src_a1_src2_hstride(devinfo, inst, - to_3src_align1_hstride(src2.hstride)); - - brw_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, src0.subnr); - if (src0.type == BRW_REGISTER_TYPE_NF) { - brw_inst_set_3src_src0_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR); + if (src0.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_3src_a1_src0_imm(devinfo, inst, src0.ud); } else { - brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr); + brw_inst_set_3src_a1_src0_vstride( + devinfo, inst, to_3src_align1_vstride(devinfo, src0.vstride)); + brw_inst_set_3src_a1_src0_hstride(devinfo, inst, + to_3src_align1_hstride(src0.hstride)); + brw_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, src0.subnr); + if (src0.type == BRW_REGISTER_TYPE_NF) { + brw_inst_set_3src_src0_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR); + } else { + brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr); + } + brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs); + brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate); } - brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs); - brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate); + brw_inst_set_3src_a1_src1_vstride( + devinfo, inst, to_3src_align1_vstride(devinfo, src1.vstride)); + brw_inst_set_3src_a1_src1_hstride(devinfo, inst, + to_3src_align1_hstride(src1.hstride)); brw_inst_set_3src_a1_src1_subreg_nr(devinfo, inst, src1.subnr); if (src1.file == BRW_ARCHITECTURE_REGISTER_FILE) { @@ -723,10 +828,17 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs); brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate); - brw_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, src2.subnr); - brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr); - brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs); - brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate); + if (src2.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_3src_a1_src2_imm(devinfo, inst, src2.ud); + } else { + brw_inst_set_3src_a1_src2_hstride(devinfo, inst, + to_3src_align1_hstride(src2.hstride)); + /* no vstride on src2 */ + brw_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, src2.subnr); + brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr); + brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs); + brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate); + } assert(src0.file == BRW_GENERAL_REGISTER_FILE || src0.file == BRW_IMMEDIATE_VALUE || @@ -737,31 +849,49 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, assert(src2.file == BRW_GENERAL_REGISTER_FILE || src2.file == BRW_IMMEDIATE_VALUE); - brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, - src0.file == BRW_GENERAL_REGISTER_FILE ? - BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : - BRW_ALIGN1_3SRC_IMMEDIATE_VALUE); - brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, - src1.file == BRW_GENERAL_REGISTER_FILE ? - BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : - BRW_ALIGN1_3SRC_ACCUMULATOR); - brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, - src2.file == BRW_GENERAL_REGISTER_FILE ? - BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : - BRW_ALIGN1_3SRC_IMMEDIATE_VALUE); + if (devinfo->gen >= 12) { + if (src0.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_3src_a1_src0_is_imm(devinfo, inst, 1); + } else { + brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, src0.file); + } + + brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, src1.file); + + if (src2.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_3src_a1_src2_is_imm(devinfo, inst, 1); + } else { + brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, src2.file); + } + } else { + brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, + src0.file == BRW_GENERAL_REGISTER_FILE ? + BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : + BRW_ALIGN1_3SRC_IMMEDIATE_VALUE); + brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, + src1.file == BRW_GENERAL_REGISTER_FILE ? + BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : + BRW_ALIGN1_3SRC_ACCUMULATOR); + brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, + src2.file == BRW_GENERAL_REGISTER_FILE ? + BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : + BRW_ALIGN1_3SRC_IMMEDIATE_VALUE); + } + } else { assert(dest.file == BRW_GENERAL_REGISTER_FILE || dest.file == BRW_MESSAGE_REGISTER_FILE); assert(dest.type == BRW_REGISTER_TYPE_F || dest.type == BRW_REGISTER_TYPE_DF || dest.type == BRW_REGISTER_TYPE_D || - dest.type == BRW_REGISTER_TYPE_UD); + dest.type == BRW_REGISTER_TYPE_UD || + (dest.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 8)); if (devinfo->gen == 6) { brw_inst_set_3src_a16_dst_reg_file(devinfo, inst, dest.file == BRW_MESSAGE_REGISTER_FILE); } brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr); - brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 16); + brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4); brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask); assert(src0.file == BRW_GENERAL_REGISTER_FILE); @@ -800,6 +930,22 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, */ brw_inst_set_3src_a16_src_type(devinfo, inst, dest.type); brw_inst_set_3src_a16_dst_type(devinfo, inst, dest.type); + + /* From the Bspec, 3D Media GPGPU, Instruction fields, srcType: + * + * "Three source instructions can use operands with mixed-mode + * precision. When SrcType field is set to :f or :hf it defines + * precision for source 0 only, and fields Src1Type and Src2Type + * define precision for other source operands: + * + * 0b = :f. Single precision Float (32-bit). + * 1b = :hf. Half precision Float (16-bit)." + */ + if (src1.type == BRW_REGISTER_TYPE_HF) + brw_inst_set_3src_a16_src1_type(devinfo, inst, 1); + + if (src2.type == BRW_REGISTER_TYPE_HF) + brw_inst_set_3src_a16_src2_type(devinfo, inst, 1); } } @@ -875,33 +1021,6 @@ brw_inst *brw_##OP(struct brw_codegen *p, \ return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \ } -/* Rounding operations (other than RNDD) require two instructions - the first - * stores a rounded value (possibly the wrong way) in the dest register, but - * also sets a per-channel "increment bit" in the flag register. A predicated - * add of 1.0 fixes dest to contain the desired result. - * - * Sandybridge and later appear to round correctly without an ADD. - */ -#define ROUND(OP) \ -void brw_##OP(struct brw_codegen *p, \ - struct brw_reg dest, \ - struct brw_reg src) \ -{ \ - const struct gen_device_info *devinfo = p->devinfo; \ - brw_inst *rnd, *add; \ - rnd = next_insn(p, BRW_OPCODE_##OP); \ - brw_set_dest(p, rnd, dest); \ - brw_set_src0(p, rnd, src); \ - \ - if (devinfo->gen < 6) { \ - /* turn on round-increments */ \ - brw_inst_set_cond_modifier(devinfo, rnd, BRW_CONDITIONAL_R); \ - add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ - brw_inst_set_pred_control(devinfo, add, BRW_PREDICATE_NORMAL); \ - } \ -} - - ALU2(SEL) ALU1(NOT) ALU2(AND) @@ -911,9 +1030,14 @@ ALU2(SHR) ALU2(SHL) ALU1(DIM) ALU2(ASR) +ALU2(ROL) +ALU2(ROR) ALU3(CSEL) ALU1(FRC) ALU1(RNDD) +ALU1(RNDE) +ALU1(RNDU) +ALU1(RNDZ) ALU2(MAC) ALU2(MACH) ALU1(LZD) @@ -933,9 +1057,6 @@ ALU1(CBIT) ALU2(ADDC) ALU2(SUBB) -ROUND(RNDZ) -ROUND(RNDE) - brw_inst * brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0) { @@ -1099,9 +1220,12 @@ brw_F32TO16(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src) } if (needs_zero_fill) { - brw_inst_set_no_dd_clear(devinfo, inst, true); + if (devinfo->gen < 12) + brw_inst_set_no_dd_clear(devinfo, inst, true); + brw_set_default_swsb(p, tgl_swsb_null()); inst = brw_MOV(p, suboffset(dst, 1), brw_imm_w(0)); - brw_inst_set_no_dd_check(devinfo, inst, true); + if (devinfo->gen < 12) + brw_inst_set_no_dd_check(devinfo, inst, true); } brw_pop_insn_state(p); @@ -1147,9 +1271,11 @@ void brw_NOP(struct brw_codegen *p) brw_inst_set_opcode(p->devinfo, insn, BRW_OPCODE_NOP); } - - - +void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func) +{ + brw_inst *insn = next_insn(p, BRW_OPCODE_SYNC); + brw_inst_set_cond_modifier(p->devinfo, insn, func); +} /*********************************************************************** * Comparisons, if/else/endif @@ -1253,7 +1379,8 @@ brw_IF(struct brw_codegen *p, unsigned execute_size) brw_inst_set_uip(devinfo, insn, 0); } else { brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); - brw_set_src0(p, insn, brw_imm_d(0)); + if (devinfo->gen < 12) + brw_set_src0(p, insn, brw_imm_d(0)); brw_inst_set_jip(devinfo, insn, 0); brw_inst_set_uip(devinfo, insn, 0); } @@ -1453,7 +1580,8 @@ brw_ELSE(struct brw_codegen *p) brw_inst_set_uip(devinfo, insn, 0); } else { brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src0(p, insn, brw_imm_d(0)); + if (devinfo->gen < 12) + brw_set_src0(p, insn, brw_imm_d(0)); brw_inst_set_jip(devinfo, insn, 0); brw_inst_set_uip(devinfo, insn, 0); } @@ -1599,18 +1727,27 @@ brw_CONT(struct brw_codegen *p) } brw_inst * -gen6_HALT(struct brw_codegen *p) +brw_HALT(struct brw_codegen *p) { const struct gen_device_info *devinfo = p->devinfo; brw_inst *insn; insn = next_insn(p, BRW_OPCODE_HALT); brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - if (devinfo->gen >= 8) { - brw_set_src0(p, insn, brw_imm_d(0x0)); - } else { + if (devinfo->gen < 6) { + /* From the Gen4 PRM: + * + * "IP register must be put (for example, by the assembler) at + * and locations. + */ + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); /* exitcode updated later. */ + } else if (devinfo->gen < 8) { brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */ + } else if (devinfo->gen < 12) { + brw_set_src0(p, insn, brw_imm_d(0x0)); } brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); @@ -1706,7 +1843,8 @@ brw_WHILE(struct brw_codegen *p) if (devinfo->gen >= 8) { brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src0(p, insn, brw_imm_d(0)); + if (devinfo->gen < 12) + brw_set_src0(p, insn, brw_imm_d(0)); brw_inst_set_jip(devinfo, insn, br * (do_insn - insn)); } else if (devinfo->gen == 7) { brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); @@ -1874,8 +2012,10 @@ void gen6_math(struct brw_codegen *p, assert(src1.file == BRW_GENERAL_REGISTER_FILE || (devinfo->gen >= 8 && src1.file == BRW_IMMEDIATE_VALUE)); } else { - assert(src0.type == BRW_REGISTER_TYPE_F); - assert(src1.type == BRW_REGISTER_TYPE_F); + assert(src0.type == BRW_REGISTER_TYPE_F || + (src0.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 9)); + assert(src1.type == BRW_REGISTER_TYPE_F || + (src1.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 9)); } /* Source modifiers are ignored for extended math instructions on Gen6. */ @@ -1924,6 +2064,7 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE : devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE : BRW_SFID_DATAPORT_WRITE); + const struct tgl_swsb swsb = brw_get_default_swsb(p); uint32_t msg_type; if (devinfo->gen >= 6) @@ -1943,11 +2084,13 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); /* set message header global offset field (reg 0, element 2) */ brw_set_default_exec_size(p, BRW_EXECUTE_1); + brw_set_default_swsb(p, tgl_swsb_null()); brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, @@ -1955,6 +2098,7 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, brw_imm_ud(offset)); brw_pop_insn_state(p); + brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); } { @@ -2029,6 +2173,7 @@ brw_oword_block_read_scratch(struct brw_codegen *p, unsigned offset) { const struct gen_device_info *devinfo = p->devinfo; + const struct tgl_swsb swsb = brw_get_default_swsb(p); if (devinfo->gen >= 6) offset /= 16; @@ -2055,6 +2200,7 @@ brw_oword_block_read_scratch(struct brw_codegen *p, { brw_push_insn_state(p); + brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_set_default_mask_control(p, BRW_MASK_DISABLE); @@ -2063,9 +2209,11 @@ brw_oword_block_read_scratch(struct brw_codegen *p, /* set message header global offset field (reg 0, element 2) */ brw_set_default_exec_size(p, BRW_EXECUTE_1); + brw_set_default_swsb(p, tgl_swsb_null()); brw_MOV(p, get_element_ud(mrf, 2), brw_imm_ud(offset)); brw_pop_insn_state(p); + brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); } { @@ -2142,6 +2290,7 @@ void brw_oword_block_read(struct brw_codegen *p, (devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE : BRW_SFID_DATAPORT_READ); const unsigned exec_size = 1 << brw_get_default_exec_size(p); + const struct tgl_swsb swsb = brw_get_default_swsb(p); /* On newer hardware, offset is in units of owords. */ if (devinfo->gen >= 6) @@ -2156,10 +2305,12 @@ void brw_oword_block_read(struct brw_codegen *p, brw_push_insn_state(p); brw_set_default_exec_size(p, BRW_EXECUTE_8); + brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); /* set message header global offset field (reg 0, element 2) */ brw_set_default_exec_size(p, BRW_EXECUTE_1); + brw_set_default_swsb(p, tgl_swsb_null()); brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, @@ -2167,6 +2318,8 @@ void brw_oword_block_read(struct brw_codegen *p, brw_imm_ud(offset)); brw_pop_insn_state(p); + brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); + brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); brw_inst_set_sfid(devinfo, insn, target_cache); @@ -2372,12 +2525,15 @@ void brw_adjust_sampler_state_pointer(struct brw_codegen *p, struct brw_reg temp = get_element_ud(header, 3); + brw_push_insn_state(p); brw_AND(p, temp, get_element_ud(sampler_index, 0), brw_imm_ud(0x0f0)); + brw_set_default_swsb(p, tgl_swsb_regdist(1)); brw_SHL(p, temp, temp, brw_imm_ud(4)); brw_ADD(p, get_element_ud(header, 3), get_element_ud(brw_vec8_grf(0, 0), 3), temp); + brw_pop_insn_state(p); } } @@ -2439,7 +2595,8 @@ brw_send_indirect_message(struct brw_codegen *p, struct brw_reg dst, struct brw_reg payload, struct brw_reg desc, - unsigned desc_imm) + unsigned desc_imm, + bool eot) { const struct gen_device_info *devinfo = p->devinfo; struct brw_inst *send; @@ -2450,9 +2607,10 @@ brw_send_indirect_message(struct brw_codegen *p, if (desc.file == BRW_IMMEDIATE_VALUE) { send = next_insn(p, BRW_OPCODE_SEND); + brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD)); brw_set_desc(p, send, desc.ud | desc_imm); - } else { + const struct tgl_swsb swsb = brw_get_default_swsb(p); struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); brw_push_insn_state(p); @@ -2460,6 +2618,7 @@ brw_send_indirect_message(struct brw_codegen *p, brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_exec_size(p, BRW_EXECUTE_1); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); /* Load the indirect descriptor to an address register using OR so the * caller can specify additional descriptor bits with the desc_imm @@ -2469,13 +2628,135 @@ brw_send_indirect_message(struct brw_codegen *p, brw_pop_insn_state(p); + brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); send = next_insn(p, BRW_OPCODE_SEND); - brw_set_src1(p, send, addr); + brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD)); + + if (devinfo->gen >= 12) + brw_inst_set_send_sel_reg32_desc(devinfo, send, true); + else + brw_set_src1(p, send, addr); } brw_set_dest(p, send, dst); - brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD)); brw_inst_set_sfid(devinfo, send, sfid); + brw_inst_set_eot(devinfo, send, eot); +} + +void +brw_send_indirect_split_message(struct brw_codegen *p, + unsigned sfid, + struct brw_reg dst, + struct brw_reg payload0, + struct brw_reg payload1, + struct brw_reg desc, + unsigned desc_imm, + struct brw_reg ex_desc, + unsigned ex_desc_imm, + bool eot) +{ + const struct gen_device_info *devinfo = p->devinfo; + struct brw_inst *send; + + dst = retype(dst, BRW_REGISTER_TYPE_UW); + + assert(desc.type == BRW_REGISTER_TYPE_UD); + + if (desc.file == BRW_IMMEDIATE_VALUE) { + desc.ud |= desc_imm; + } else { + const struct tgl_swsb swsb = brw_get_default_swsb(p); + struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); + + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_exec_size(p, BRW_EXECUTE_1); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); + + /* Load the indirect descriptor to an address register using OR so the + * caller can specify additional descriptor bits with the desc_imm + * immediate. + */ + brw_OR(p, addr, desc, brw_imm_ud(desc_imm)); + + brw_pop_insn_state(p); + desc = addr; + + brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); + } + + if (ex_desc.file == BRW_IMMEDIATE_VALUE && + (devinfo->gen >= 12 || (ex_desc.ud & INTEL_MASK(15, 12)) == 0)) { + ex_desc.ud |= ex_desc_imm; + } else { + const struct tgl_swsb swsb = brw_get_default_swsb(p); + struct brw_reg addr = retype(brw_address_reg(2), BRW_REGISTER_TYPE_UD); + + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_exec_size(p, BRW_EXECUTE_1); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); + + /* Load the indirect extended descriptor to an address register using OR + * so the caller can specify additional descriptor bits with the + * desc_imm immediate. + * + * Even though the instruction dispatcher always pulls the SFID and EOT + * fields from the instruction itself, actual external unit which + * processes the message gets the SFID and EOT from the extended + * descriptor which comes from the address register. If we don't OR + * those two bits in, the external unit may get confused and hang. + */ + unsigned imm_part = ex_desc_imm | sfid | eot << 5; + + if (ex_desc.file == BRW_IMMEDIATE_VALUE) { + /* ex_desc bits 15:12 don't exist in the instruction encoding prior + * to Gen12, so we may have fallen back to an indirect extended + * descriptor. + */ + brw_MOV(p, addr, brw_imm_ud(ex_desc.ud | imm_part)); + } else { + brw_OR(p, addr, ex_desc, brw_imm_ud(imm_part)); + } + + brw_pop_insn_state(p); + ex_desc = addr; + + brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); + } + + send = next_insn(p, devinfo->gen >= 12 ? BRW_OPCODE_SEND : BRW_OPCODE_SENDS); + brw_set_dest(p, send, dst); + brw_set_src0(p, send, retype(payload0, BRW_REGISTER_TYPE_UD)); + brw_set_src1(p, send, retype(payload1, BRW_REGISTER_TYPE_UD)); + + if (desc.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_send_sel_reg32_desc(devinfo, send, 0); + brw_inst_set_send_desc(devinfo, send, desc.ud); + } else { + assert(desc.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(desc.nr == BRW_ARF_ADDRESS); + assert(desc.subnr == 0); + brw_inst_set_send_sel_reg32_desc(devinfo, send, 1); + } + + if (ex_desc.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0); + brw_inst_set_sends_ex_desc(devinfo, send, ex_desc.ud); + } else { + assert(ex_desc.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(ex_desc.nr == BRW_ARF_ADDRESS); + assert((ex_desc.subnr & 0x3) == 0); + brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1); + brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, ex_desc.subnr >> 2); + } + + brw_inst_set_sfid(devinfo, send, sfid); + brw_inst_set_eot(devinfo, send, eot); } static void @@ -2487,6 +2768,7 @@ brw_send_indirect_surface_message(struct brw_codegen *p, unsigned desc_imm) { if (surface.file != BRW_IMMEDIATE_VALUE) { + const struct tgl_swsb swsb = brw_get_default_swsb(p); struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); brw_push_insn_state(p); @@ -2494,6 +2776,7 @@ brw_send_indirect_surface_message(struct brw_codegen *p, brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_exec_size(p, BRW_EXECUTE_1); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); /* Mask out invalid bits from the surface index to avoid hangs e.g. when * some surface array is accessed out of bounds. @@ -2506,9 +2789,10 @@ brw_send_indirect_surface_message(struct brw_codegen *p, brw_pop_insn_state(p); surface = addr; + brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); } - brw_send_indirect_message(p, sfid, dst, payload, surface, desc_imm); + brw_send_indirect_message(p, sfid, dst, payload, surface, desc_imm, false); } static bool @@ -2557,6 +2841,8 @@ brw_find_next_block_end(struct brw_codegen *p, int start_offset) case BRW_OPCODE_HALT: if (depth == 0) return offset; + default: + break; } } @@ -2662,6 +2948,9 @@ brw_set_uip_jip(struct brw_codegen *p, int start_offset) assert(brw_inst_uip(devinfo, insn) != 0); assert(brw_inst_jip(devinfo, insn) != 0); break; + + default: + break; } } } @@ -2835,100 +3124,12 @@ brw_untyped_surface_write(struct brw_codegen *p, payload, surface, desc); } -void -brw_typed_atomic(struct brw_codegen *p, - struct brw_reg dst, - struct brw_reg payload, - struct brw_reg surface, - unsigned atomic_op, - unsigned msg_length, - bool response_expected, - bool header_present) { - const struct gen_device_info *devinfo = p->devinfo; - const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? - HSW_SFID_DATAPORT_DATA_CACHE_1 : - GEN6_SFID_DATAPORT_RENDER_CACHE); - const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; - /* SIMD4x2 typed atomic instructions only exist on HSW+ */ - const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell; - const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : - has_simd4x2 ? 0 : 8; - /* Typed atomics don't support SIMD16 */ - assert(exec_size <= 8); - const unsigned response_length = - brw_surface_payload_size(p, response_expected, exec_size); - const unsigned desc = - brw_message_desc(devinfo, msg_length, response_length, header_present) | - brw_dp_typed_atomic_desc(devinfo, exec_size, brw_get_default_group(p), - atomic_op, response_expected); - /* Mask out unused components -- See comment in brw_untyped_atomic(). */ - const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X; - - brw_send_indirect_surface_message(p, sfid, brw_writemask(dst, mask), - payload, surface, desc); -} - -void -brw_typed_surface_read(struct brw_codegen *p, - struct brw_reg dst, - struct brw_reg payload, - struct brw_reg surface, - unsigned msg_length, - unsigned num_channels, - bool header_present) -{ - const struct gen_device_info *devinfo = p->devinfo; - const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? - HSW_SFID_DATAPORT_DATA_CACHE_1 : - GEN6_SFID_DATAPORT_RENDER_CACHE); - const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; - /* SIMD4x2 typed read instructions only exist on HSW+ */ - const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell; - const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : - has_simd4x2 ? 0 : 8; - const unsigned response_length = - brw_surface_payload_size(p, num_channels, exec_size); - const unsigned desc = - brw_message_desc(devinfo, msg_length, response_length, header_present) | - brw_dp_typed_surface_rw_desc(devinfo, exec_size, brw_get_default_group(p), - num_channels, false); - - brw_send_indirect_surface_message(p, sfid, dst, payload, surface, desc); -} - -void -brw_typed_surface_write(struct brw_codegen *p, - struct brw_reg payload, - struct brw_reg surface, - unsigned msg_length, - unsigned num_channels, - bool header_present) -{ - const struct gen_device_info *devinfo = p->devinfo; - const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? - HSW_SFID_DATAPORT_DATA_CACHE_1 : - GEN6_SFID_DATAPORT_RENDER_CACHE); - const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; - /* SIMD4x2 typed read instructions only exist on HSW+ */ - const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell; - const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : - has_simd4x2 ? 0 : 8; - const unsigned desc = - brw_message_desc(devinfo, msg_length, 0, header_present) | - brw_dp_typed_surface_rw_desc(devinfo, exec_size, brw_get_default_group(p), - num_channels, true); - /* Mask out unused components -- See comment in brw_untyped_atomic(). */ - const unsigned mask = !has_simd4x2 && !align1 ? WRITEMASK_X : WRITEMASK_XYZW; - - brw_send_indirect_surface_message(p, sfid, brw_writemask(brw_null_reg(), mask), - payload, surface, desc); -} - static void brw_set_memory_fence_message(struct brw_codegen *p, struct brw_inst *insn, enum brw_message_target sfid, - bool commit_enable) + bool commit_enable, + unsigned bti) { const struct gen_device_info *devinfo = p->devinfo; @@ -2950,54 +3151,34 @@ brw_set_memory_fence_message(struct brw_codegen *p, if (commit_enable) brw_inst_set_dp_msg_control(devinfo, insn, 1 << 5); + + assert(devinfo->gen >= 11 || bti == 0); + brw_inst_set_binding_table_index(devinfo, insn, bti); } void brw_memory_fence(struct brw_codegen *p, struct brw_reg dst, - enum opcode send_op) + struct brw_reg src, + enum opcode send_op, + enum brw_message_target sfid, + bool commit_enable, + unsigned bti) { const struct gen_device_info *devinfo = p->devinfo; - const bool commit_enable = - devinfo->gen >= 10 || /* HSD ES # 1404612949 */ - (devinfo->gen == 7 && !devinfo->is_haswell); - struct brw_inst *insn; - brw_push_insn_state(p); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_set_default_exec_size(p, BRW_EXECUTE_1); - dst = vec1(dst); + dst = retype(vec1(dst), BRW_REGISTER_TYPE_UW); + src = retype(vec1(src), BRW_REGISTER_TYPE_UD); /* Set dst as destination for dependency tracking, the MEMORY_FENCE * message doesn't write anything back. */ - insn = next_insn(p, send_op); - dst = retype(dst, BRW_REGISTER_TYPE_UW); + struct brw_inst *insn = next_insn(p, send_op); + brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); + brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1); brw_set_dest(p, insn, dst); - brw_set_src0(p, insn, dst); - brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, - commit_enable); - - if (devinfo->gen == 7 && !devinfo->is_haswell) { - /* IVB does typed surface access through the render cache, so we need to - * flush it too. Use a different register so both flushes can be - * pipelined by the hardware. - */ - insn = next_insn(p, send_op); - brw_set_dest(p, insn, offset(dst, 1)); - brw_set_src0(p, insn, offset(dst, 1)); - brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE, - commit_enable); - - /* Now write the response of the second message into the response of the - * first to trigger a pipeline stall -- This way future render and data - * cache messages will be properly ordered with respect to past data and - * render cache messages. - */ - brw_MOV(p, dst, offset(dst, 1)); - } - - brw_pop_insn_state(p); + brw_set_src0(p, insn, src); + brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti); } void @@ -3027,7 +3208,8 @@ brw_pixel_interpolator_query(struct brw_codegen *p, dest, mrf, vec1(data), - desc); + desc, + false); } void @@ -3075,6 +3257,7 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst, * hardware. */ brw_SHR(p, vec1(dst), mask, brw_imm_ud(qtr_control * 8)); + brw_set_default_swsb(p, tgl_swsb_regdist(1)); brw_AND(p, vec1(dst), exec_mask, vec1(dst)); exec_mask = vec1(dst); } @@ -3210,7 +3393,7 @@ brw_broadcast(struct brw_codegen *p, /* Take into account the component size and horizontal stride. */ assert(src.vstride == src.hstride + src.width); brw_SHL(p, addr, vec1(idx), - brw_imm_ud(_mesa_logbase2(type_sz(src.type)) + + brw_imm_ud(util_logbase2(type_sz(src.type)) + src.hstride - 1)); /* We can only address up to limit bytes using the indirect @@ -3218,12 +3401,15 @@ brw_broadcast(struct brw_codegen *p, * register is above this limit. */ if (offset >= limit) { + brw_set_default_swsb(p, tgl_swsb_regdist(1)); brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit)); offset = offset % limit; } brw_pop_insn_state(p); + brw_set_default_swsb(p, tgl_swsb_regdist(1)); + /* Use indirect addressing to fetch the specified component. */ if (type_sz(src.type) > 4 && (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) { @@ -3242,6 +3428,7 @@ brw_broadcast(struct brw_codegen *p, brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0), retype(brw_vec1_indirect(addr.subnr, offset), BRW_REGISTER_TYPE_D)); + brw_set_default_swsb(p, tgl_swsb_null()); brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1), retype(brw_vec1_indirect(addr.subnr, offset + 4), BRW_REGISTER_TYPE_D)); @@ -3342,7 +3529,6 @@ brw_barrier(struct brw_codegen *p, struct brw_reg src) brw_set_desc(p, inst, brw_message_desc(devinfo, 1, 0, false)); brw_inst_set_sfid(devinfo, inst, BRW_SFID_MESSAGE_GATEWAY); - brw_inst_set_gateway_notify(devinfo, inst, 1); brw_inst_set_gateway_subfuncid(devinfo, inst, BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG); @@ -3371,37 +3557,35 @@ brw_WAIT(struct brw_codegen *p) brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); } -/** - * Changes the floating point rounding mode updating the control register - * field defined at cr0.0[5-6] bits. This function supports the changes to - * RTNE (00), RU (01), RD (10) and RTZ (11) rounding using bitwise operations. - * Only RTNE and RTZ rounding are enabled at nir. - */ void -brw_rounding_mode(struct brw_codegen *p, - enum brw_rnd_mode mode) -{ - const unsigned bits = mode << BRW_CR0_RND_MODE_SHIFT; - - if (bits != BRW_CR0_RND_MODE_MASK) { - brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), - brw_imm_ud(~BRW_CR0_RND_MODE_MASK)); - brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1); - - /* From the Skylake PRM, Volume 7, page 760: - * "Implementation Restriction on Register Access: When the control - * register is used as an explicit source and/or destination, hardware - * does not ensure execution pipeline coherency. Software must set the - * thread control field to ‘switch’ for an instruction that uses - * control register as an explicit operand." - */ - brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH); - } +brw_float_controls_mode(struct brw_codegen *p, + unsigned mode, unsigned mask) +{ + /* From the Skylake PRM, Volume 7, page 760: + * "Implementation Restriction on Register Access: When the control + * register is used as an explicit source and/or destination, hardware + * does not ensure execution pipeline coherency. Software must set the + * thread control field to ‘switch’ for an instruction that uses + * control register as an explicit operand." + * + * On Gen12+ this is implemented in terms of SWSB annotations instead. + */ + brw_set_default_swsb(p, tgl_swsb_regdist(1)); - if (bits) { - brw_inst *inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), - brw_imm_ud(bits)); - brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1); + brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), + brw_imm_ud(~mask)); + brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1); + if (p->devinfo->gen < 12) brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH); + + if (mode) { + brw_inst *inst_or = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), + brw_imm_ud(mode)); + brw_inst_set_exec_size(p->devinfo, inst_or, BRW_EXECUTE_1); + if (p->devinfo->gen < 12) + brw_inst_set_thread_control(p->devinfo, inst_or, BRW_THREAD_SWITCH); } + + if (p->devinfo->gen >= 12) + brw_SYNC(p, TGL_SYNC_NOP); }