X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fcompiler%2Fbrw_eu_emit.c;h=0d5c755f9e8d36e9ee1b7db70af4b2b63fc9aed6;hb=72dc06e07e3f8b9ed5bb46e3927b8f87dd24e42b;hp=18378b847a919ef990b3a4f2aac116839c6af620;hpb=1c90ae5accc284cf6b27bcc25e5e3fc3cfb80407;p=mesa.git diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 18378b847a9..0d5c755f9e8 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -55,6 +55,7 @@ gen6_resolve_implied_move(struct brw_codegen *p, return; if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { + assert(devinfo->gen < 12); brw_push_insn_state(p); brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_mask_control(p, BRW_MASK_DISABLE); @@ -91,51 +92,92 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest) if (dest.file == BRW_MESSAGE_REGISTER_FILE) assert((dest.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen)); - else if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE) + else if (dest.file == BRW_GENERAL_REGISTER_FILE) assert(dest.nr < 128); - gen7_convert_mrf_to_grf(p, &dest); + /* The hardware has a restriction where a destination of size Byte with + * a stride of 1 is only allowed for a packed byte MOV. For any other + * instruction, the stride must be at least 2, even when the destination + * is the NULL register. + */ + if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && + dest.nr == BRW_ARF_NULL && + type_sz(dest.type) == 1 && + dest.hstride == BRW_HORIZONTAL_STRIDE_1) { + dest.hstride = BRW_HORIZONTAL_STRIDE_2; + } - brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type); - brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode); + gen7_convert_mrf_to_grf(p, &dest); - if (dest.address_mode == BRW_ADDRESS_DIRECT) { + if (devinfo->gen >= 12 && + (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) { + assert(dest.file == BRW_GENERAL_REGISTER_FILE || + dest.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(dest.address_mode == BRW_ADDRESS_DIRECT); + assert(dest.subnr == 0); + assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 || + (dest.hstride == BRW_HORIZONTAL_STRIDE_1 && + dest.vstride == dest.width + 1)); + assert(!dest.negate && !dest.abs); + brw_inst_set_dst_reg_file(devinfo, inst, dest.file); brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr); - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_inst_set_dst_da1_subreg_nr(devinfo, inst, dest.subnr); - if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) - dest.hstride = BRW_HORIZONTAL_STRIDE_1; - brw_inst_set_dst_hstride(devinfo, inst, dest.hstride); - } else { - brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16); - brw_inst_set_da16_writemask(devinfo, inst, dest.writemask); - if (dest.file == BRW_GENERAL_REGISTER_FILE || - dest.file == BRW_MESSAGE_REGISTER_FILE) { - assert(dest.writemask != 0); - } - /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1: - * Although Dst.HorzStride is a don't care for Align16, HW needs - * this to be programmed as "01". - */ - brw_inst_set_dst_hstride(devinfo, inst, 1); - } + } else if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) { + assert(devinfo->gen < 12); + assert(dest.file == BRW_GENERAL_REGISTER_FILE || + dest.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(dest.address_mode == BRW_ADDRESS_DIRECT); + assert(dest.subnr % 16 == 0); + assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 && + dest.vstride == dest.width + 1); + assert(!dest.negate && !dest.abs); + brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr); + brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16); + brw_inst_set_send_dst_reg_file(devinfo, inst, dest.file); } else { - brw_inst_set_dst_ia_subreg_nr(devinfo, inst, dest.subnr); + brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type); + brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode); - /* These are different sizes in align1 vs align16: - */ - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_inst_set_dst_ia1_addr_imm(devinfo, inst, - dest.indirect_offset); - if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) - dest.hstride = BRW_HORIZONTAL_STRIDE_1; - brw_inst_set_dst_hstride(devinfo, inst, dest.hstride); + if (dest.address_mode == BRW_ADDRESS_DIRECT) { + brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr); + + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + brw_inst_set_dst_da1_subreg_nr(devinfo, inst, dest.subnr); + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + brw_inst_set_dst_hstride(devinfo, inst, dest.hstride); + } else { + brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16); + brw_inst_set_da16_writemask(devinfo, inst, dest.writemask); + if (dest.file == BRW_GENERAL_REGISTER_FILE || + dest.file == BRW_MESSAGE_REGISTER_FILE) { + assert(dest.writemask != 0); + } + /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1: + * Although Dst.HorzStride is a don't care for Align16, HW needs + * this to be programmed as "01". + */ + brw_inst_set_dst_hstride(devinfo, inst, 1); + } } else { - brw_inst_set_dst_ia16_addr_imm(devinfo, inst, - dest.indirect_offset); - /* even ignored in da16, still need to set as '01' */ - brw_inst_set_dst_hstride(devinfo, inst, 1); + brw_inst_set_dst_ia_subreg_nr(devinfo, inst, dest.subnr); + + /* These are different sizes in align1 vs align16: + */ + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + brw_inst_set_dst_ia1_addr_imm(devinfo, inst, + dest.indirect_offset); + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + brw_inst_set_dst_hstride(devinfo, inst, dest.hstride); + } else { + brw_inst_set_dst_ia16_addr_imm(devinfo, inst, + dest.indirect_offset); + /* even ignored in da16, still need to set as '01' */ + brw_inst_set_dst_hstride(devinfo, inst, 1); + } } } @@ -170,13 +212,16 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) if (reg.file == BRW_MESSAGE_REGISTER_FILE) assert((reg.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen)); - else if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE) + else if (reg.file == BRW_GENERAL_REGISTER_FILE) assert(reg.nr < 128); gen7_convert_mrf_to_grf(p, ®); - if (devinfo->gen >= 6 && (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || - brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) { + if (devinfo->gen >= 6 && + (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC)) { /* Any source modifiers or regions will be ignored, since this just * identifies the MRF/GRF to start reading the message contents from. * Check for some likely failures. @@ -186,84 +231,110 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) assert(reg.address_mode == BRW_ADDRESS_DIRECT); } - brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type); - brw_inst_set_src0_abs(devinfo, inst, reg.abs); - brw_inst_set_src0_negate(devinfo, inst, reg.negate); - brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode); - - if (reg.file == BRW_IMMEDIATE_VALUE) { - if (reg.type == BRW_REGISTER_TYPE_DF || - brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DIM) - brw_inst_set_imm_df(devinfo, inst, reg.df); - else if (reg.type == BRW_REGISTER_TYPE_UQ || - reg.type == BRW_REGISTER_TYPE_Q) - brw_inst_set_imm_uq(devinfo, inst, reg.u64); - else - brw_inst_set_imm_ud(devinfo, inst, reg.ud); - - if (type_sz(reg.type) < 8) { - brw_inst_set_src1_reg_file(devinfo, inst, - BRW_ARCHITECTURE_REGISTER_FILE); - brw_inst_set_src1_reg_hw_type(devinfo, inst, - brw_inst_src0_reg_hw_type(devinfo, inst)); - } + if (devinfo->gen >= 12 && + (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) { + assert(reg.file != BRW_IMMEDIATE_VALUE); + assert(reg.address_mode == BRW_ADDRESS_DIRECT); + assert(reg.subnr == 0); + assert(has_scalar_region(reg) || + (reg.hstride == BRW_HORIZONTAL_STRIDE_1 && + reg.vstride == reg.width + 1)); + assert(!reg.negate && !reg.abs); + brw_inst_set_send_src0_reg_file(devinfo, inst, reg.file); + brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr); + + } else if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) { + assert(reg.file == BRW_GENERAL_REGISTER_FILE); + assert(reg.address_mode == BRW_ADDRESS_DIRECT); + assert(reg.subnr % 16 == 0); + assert(has_scalar_region(reg) || + (reg.hstride == BRW_HORIZONTAL_STRIDE_1 && + reg.vstride == reg.width + 1)); + assert(!reg.negate && !reg.abs); + brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr); + brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16); } else { - if (reg.address_mode == BRW_ADDRESS_DIRECT) { - brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr); - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_inst_set_src0_da1_subreg_nr(devinfo, inst, reg.subnr); - } else { - brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16); - } + brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type); + brw_inst_set_src0_abs(devinfo, inst, reg.abs); + brw_inst_set_src0_negate(devinfo, inst, reg.negate); + brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode); + + if (reg.file == BRW_IMMEDIATE_VALUE) { + if (reg.type == BRW_REGISTER_TYPE_DF || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DIM) + brw_inst_set_imm_df(devinfo, inst, reg.df); + else if (reg.type == BRW_REGISTER_TYPE_UQ || + reg.type == BRW_REGISTER_TYPE_Q) + brw_inst_set_imm_uq(devinfo, inst, reg.u64); + else + brw_inst_set_imm_ud(devinfo, inst, reg.ud); + + if (devinfo->gen < 12 && type_sz(reg.type) < 8) { + brw_inst_set_src1_reg_file(devinfo, inst, + BRW_ARCHITECTURE_REGISTER_FILE); + brw_inst_set_src1_reg_hw_type(devinfo, inst, + brw_inst_src0_reg_hw_type(devinfo, inst)); + } } else { - brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr); + if (reg.address_mode == BRW_ADDRESS_DIRECT) { + brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr); + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + brw_inst_set_src0_da1_subreg_nr(devinfo, inst, reg.subnr); + } else { + brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16); + } + } else { + brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr); - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset); - } else { - brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset); - } - } + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset); + } else { + brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset); + } + } - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - if (reg.width == BRW_WIDTH_1 && - brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) { - brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0); - brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1); - brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0); - } else { - brw_inst_set_src0_hstride(devinfo, inst, reg.hstride); - brw_inst_set_src0_width(devinfo, inst, reg.width); - brw_inst_set_src0_vstride(devinfo, inst, reg.vstride); - } - } else { - brw_inst_set_src0_da16_swiz_x(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X)); - brw_inst_set_src0_da16_swiz_y(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y)); - brw_inst_set_src0_da16_swiz_z(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z)); - brw_inst_set_src0_da16_swiz_w(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W)); - - if (reg.vstride == BRW_VERTICAL_STRIDE_8) { - /* This is an oddity of the fact we're using the same - * descriptions for registers in align_16 as align_1: - */ - brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); - } else if (devinfo->gen == 7 && !devinfo->is_haswell && - reg.type == BRW_REGISTER_TYPE_DF && - reg.vstride == BRW_VERTICAL_STRIDE_2) { - /* From SNB PRM: - * - * "For Align16 access mode, only encodings of 0000 and 0011 - * are allowed. Other codes are reserved." - * - * Presumably the DevSNB behavior applies to IVB as well. - */ - brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) { + brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0); + brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1); + brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0); + } else { + brw_inst_set_src0_hstride(devinfo, inst, reg.hstride); + brw_inst_set_src0_width(devinfo, inst, reg.width); + brw_inst_set_src0_vstride(devinfo, inst, reg.vstride); + } } else { - brw_inst_set_src0_vstride(devinfo, inst, reg.vstride); + brw_inst_set_src0_da16_swiz_x(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X)); + brw_inst_set_src0_da16_swiz_y(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y)); + brw_inst_set_src0_da16_swiz_z(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z)); + brw_inst_set_src0_da16_swiz_w(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W)); + + if (reg.vstride == BRW_VERTICAL_STRIDE_8) { + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + } else if (devinfo->gen == 7 && !devinfo->is_haswell && + reg.type == BRW_REGISTER_TYPE_DF && + reg.vstride == BRW_VERTICAL_STRIDE_2) { + /* From SNB PRM: + * + * "For Align16 access mode, only encodings of 0000 and 0011 + * are allowed. Other codes are reserved." + * + * Presumably the DevSNB behavior applies to IVB as well. + */ + brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + } else { + brw_inst_set_src0_vstride(devinfo, inst, reg.vstride); + } } } } @@ -275,85 +346,102 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) { const struct gen_device_info *devinfo = p->devinfo; - if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE) + if (reg.file == BRW_GENERAL_REGISTER_FILE) assert(reg.nr < 128); - /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5: - * - * "Accumulator registers may be accessed explicitly as src0 - * operands only." - */ - assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE || - reg.nr != BRW_ARF_ACCUMULATOR); - - gen7_convert_mrf_to_grf(p, ®); - assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC || + (devinfo->gen >= 12 && + (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC))) { + assert(reg.file == BRW_GENERAL_REGISTER_FILE || + reg.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(reg.address_mode == BRW_ADDRESS_DIRECT); + assert(reg.subnr == 0); + assert(has_scalar_region(reg) || + (reg.hstride == BRW_HORIZONTAL_STRIDE_1 && + reg.vstride == reg.width + 1)); + assert(!reg.negate && !reg.abs); + brw_inst_set_send_src1_reg_nr(devinfo, inst, reg.nr); + brw_inst_set_send_src1_reg_file(devinfo, inst, reg.file); + } else { + /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5: + * + * "Accumulator registers may be accessed explicitly as src0 + * operands only." + */ + assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE || + reg.nr != BRW_ARF_ACCUMULATOR); - brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type); - brw_inst_set_src1_abs(devinfo, inst, reg.abs); - brw_inst_set_src1_negate(devinfo, inst, reg.negate); + gen7_convert_mrf_to_grf(p, ®); + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); - /* Only src1 can be immediate in two-argument instructions. - */ - assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE); + brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type); + brw_inst_set_src1_abs(devinfo, inst, reg.abs); + brw_inst_set_src1_negate(devinfo, inst, reg.negate); - if (reg.file == BRW_IMMEDIATE_VALUE) { - /* two-argument instructions can only use 32-bit immediates */ - assert(type_sz(reg.type) < 8); - brw_inst_set_imm_ud(devinfo, inst, reg.ud); - } else { - /* This is a hardware restriction, which may or may not be lifted - * in the future: + /* Only src1 can be immediate in two-argument instructions. */ - assert (reg.address_mode == BRW_ADDRESS_DIRECT); - /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ + assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE); - brw_inst_set_src1_da_reg_nr(devinfo, inst, reg.nr); - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_inst_set_src1_da1_subreg_nr(devinfo, inst, reg.subnr); + if (reg.file == BRW_IMMEDIATE_VALUE) { + /* two-argument instructions can only use 32-bit immediates */ + assert(type_sz(reg.type) < 8); + brw_inst_set_imm_ud(devinfo, inst, reg.ud); } else { - brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16); - } + /* This is a hardware restriction, which may or may not be lifted + * in the future: + */ + assert (reg.address_mode == BRW_ADDRESS_DIRECT); + /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - if (reg.width == BRW_WIDTH_1 && - brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) { - brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0); - brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1); - brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0); - } else { - brw_inst_set_src1_hstride(devinfo, inst, reg.hstride); - brw_inst_set_src1_width(devinfo, inst, reg.width); - brw_inst_set_src1_vstride(devinfo, inst, reg.vstride); - } - } else { - brw_inst_set_src1_da16_swiz_x(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X)); - brw_inst_set_src1_da16_swiz_y(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y)); - brw_inst_set_src1_da16_swiz_z(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z)); - brw_inst_set_src1_da16_swiz_w(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W)); - - if (reg.vstride == BRW_VERTICAL_STRIDE_8) { - /* This is an oddity of the fact we're using the same - * descriptions for registers in align_16 as align_1: - */ - brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); - } else if (devinfo->gen == 7 && !devinfo->is_haswell && - reg.type == BRW_REGISTER_TYPE_DF && - reg.vstride == BRW_VERTICAL_STRIDE_2) { - /* From SNB PRM: - * - * "For Align16 access mode, only encodings of 0000 and 0011 - * are allowed. Other codes are reserved." - * - * Presumably the DevSNB behavior applies to IVB as well. - */ - brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + brw_inst_set_src1_da_reg_nr(devinfo, inst, reg.nr); + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + brw_inst_set_src1_da1_subreg_nr(devinfo, inst, reg.subnr); } else { - brw_inst_set_src1_vstride(devinfo, inst, reg.vstride); + brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16); + } + + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) { + brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0); + brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1); + brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0); + } else { + brw_inst_set_src1_hstride(devinfo, inst, reg.hstride); + brw_inst_set_src1_width(devinfo, inst, reg.width); + brw_inst_set_src1_vstride(devinfo, inst, reg.vstride); + } + } else { + brw_inst_set_src1_da16_swiz_x(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X)); + brw_inst_set_src1_da16_swiz_y(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y)); + brw_inst_set_src1_da16_swiz_z(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z)); + brw_inst_set_src1_da16_swiz_w(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W)); + + if (reg.vstride == BRW_VERTICAL_STRIDE_8) { + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + } else if (devinfo->gen == 7 && !devinfo->is_haswell && + reg.type == BRW_REGISTER_TYPE_DF && + reg.vstride == BRW_VERTICAL_STRIDE_2) { + /* From SNB PRM: + * + * "For Align16 access mode, only encodings of 0000 and 0011 + * are allowed. Other codes are reserved." + * + * Presumably the DevSNB behavior applies to IVB as well. + */ + brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + } else { + brw_inst_set_src1_vstride(devinfo, inst, reg.vstride); + } } } } @@ -368,11 +456,13 @@ brw_set_desc_ex(struct brw_codegen *p, brw_inst *inst, unsigned desc, unsigned ex_desc) { const struct gen_device_info *devinfo = p->devinfo; - brw_inst_set_src1_file_type(devinfo, inst, - BRW_IMMEDIATE_VALUE, BRW_REGISTER_TYPE_D); + assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC); + if (devinfo->gen < 12) + brw_inst_set_src1_file_type(devinfo, inst, + BRW_IMMEDIATE_VALUE, BRW_REGISTER_TYPE_UD); brw_inst_set_send_desc(devinfo, inst, desc); - if (devinfo->gen >= 9 && (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || - brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) + if (devinfo->gen >= 9) brw_inst_set_send_ex_desc(devinfo, inst, ex_desc); } @@ -489,99 +579,6 @@ static void brw_set_urb_message( struct brw_codegen *p, } } -void -brw_set_dp_write_message(struct brw_codegen *p, - brw_inst *insn, - unsigned binding_table_index, - unsigned msg_control, - unsigned msg_type, - unsigned target_cache, - unsigned msg_length, - bool header_present, - unsigned last_render_target, - unsigned response_length, - unsigned end_of_thread, - unsigned send_commit_msg) -{ - const struct gen_device_info *devinfo = p->devinfo; - const unsigned sfid = (devinfo->gen >= 6 ? target_cache : - BRW_SFID_DATAPORT_WRITE); - - brw_set_desc(p, insn, brw_message_desc( - devinfo, msg_length, response_length, header_present)); - - brw_inst_set_sfid(devinfo, insn, sfid); - brw_inst_set_eot(devinfo, insn, !!end_of_thread); - brw_inst_set_binding_table_index(devinfo, insn, binding_table_index); - brw_inst_set_dp_write_msg_type(devinfo, insn, msg_type); - brw_inst_set_dp_write_msg_control(devinfo, insn, msg_control); - brw_inst_set_rt_last(devinfo, insn, last_render_target); - if (devinfo->gen < 7) { - brw_inst_set_dp_write_commit(devinfo, insn, send_commit_msg); - } - - if (devinfo->gen >= 11) - brw_inst_set_null_rt(devinfo, insn, false); -} - -void -brw_set_dp_read_message(struct brw_codegen *p, - brw_inst *insn, - unsigned binding_table_index, - unsigned msg_control, - unsigned msg_type, - unsigned target_cache, - unsigned msg_length, - bool header_present, - unsigned response_length) -{ - const struct gen_device_info *devinfo = p->devinfo; - const unsigned sfid = (devinfo->gen >= 6 ? target_cache : - BRW_SFID_DATAPORT_READ); - - brw_set_desc(p, insn, brw_message_desc( - devinfo, msg_length, response_length, header_present)); - - const unsigned opcode = brw_inst_opcode(devinfo, insn); - if (opcode == BRW_OPCODE_SEND || opcode == BRW_OPCODE_SENDC) - brw_inst_set_sfid(devinfo, insn, sfid); - brw_inst_set_binding_table_index(devinfo, insn, binding_table_index); - brw_inst_set_dp_read_msg_type(devinfo, insn, msg_type); - brw_inst_set_dp_read_msg_control(devinfo, insn, msg_control); - if (devinfo->gen < 6) - brw_inst_set_dp_read_target_cache(devinfo, insn, target_cache); -} - -void -brw_set_sampler_message(struct brw_codegen *p, - brw_inst *inst, - unsigned binding_table_index, - unsigned sampler, - unsigned msg_type, - unsigned response_length, - unsigned msg_length, - unsigned header_present, - unsigned simd_mode, - unsigned return_format) -{ - const struct gen_device_info *devinfo = p->devinfo; - - brw_set_desc(p, inst, brw_message_desc( - devinfo, msg_length, response_length, header_present)); - - const unsigned opcode = brw_inst_opcode(devinfo, inst); - if (opcode == BRW_OPCODE_SEND || opcode == BRW_OPCODE_SENDC) - brw_inst_set_sfid(devinfo, inst, BRW_SFID_SAMPLER); - brw_inst_set_binding_table_index(devinfo, inst, binding_table_index); - brw_inst_set_sampler(devinfo, inst, sampler); - brw_inst_set_sampler_msg_type(devinfo, inst, msg_type); - if (devinfo->gen >= 5) { - brw_inst_set_sampler_simd_mode(devinfo, inst, simd_mode); - } else if (devinfo->gen == 4 && !devinfo->is_g4x) { - brw_inst_set_sampler_return_format(devinfo, inst, return_format); - } -} - static void gen7_set_dp_scratch_message(struct brw_codegen *p, brw_inst *inst, @@ -597,7 +594,7 @@ gen7_set_dp_scratch_message(struct brw_codegen *p, const struct gen_device_info *devinfo = p->devinfo; assert(num_regs == 1 || num_regs == 2 || num_regs == 4 || (devinfo->gen >= 8 && num_regs == 8)); - const unsigned block_size = (devinfo->gen >= 8 ? _mesa_logbase2(num_regs) : + const unsigned block_size = (devinfo->gen >= 8 ? util_logbase2(num_regs) : num_regs - 1); brw_set_desc(p, inst, brw_message_desc( @@ -622,6 +619,8 @@ brw_inst_set_state(const struct gen_device_info *devinfo, brw_inst_set_compression(devinfo, insn, state->compressed); brw_inst_set_access_mode(devinfo, insn, state->access_mode); brw_inst_set_mask_control(devinfo, insn, state->mask_control); + if (devinfo->gen >= 12) + brw_inst_set_swsb(devinfo, insn, tgl_swsb_encode(state->swsb)); brw_inst_set_saturate(devinfo, insn, state->saturate); brw_inst_set_pred_control(devinfo, insn, state->predicate); brw_inst_set_pred_inv(devinfo, insn, state->pred_inv); @@ -701,12 +700,17 @@ get_3src_subreg_nr(struct brw_reg reg) } static enum gen10_align1_3src_vertical_stride -to_3src_align1_vstride(enum brw_vertical_stride vstride) +to_3src_align1_vstride(const struct gen_device_info *devinfo, + enum brw_vertical_stride vstride) { switch (vstride) { case BRW_VERTICAL_STRIDE_0: return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0; + case BRW_VERTICAL_STRIDE_1: + assert(devinfo->gen >= 12); + return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_1; case BRW_VERTICAL_STRIDE_2: + assert(devinfo->gen < 12); return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2; case BRW_VERTICAL_STRIDE_4: return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4; @@ -746,9 +750,14 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, gen7_convert_mrf_to_grf(p, &dest); assert(dest.nr < 128); - assert(src0.nr < 128); - assert(src1.nr < 128); - assert(src2.nr < 128); + + if (devinfo->gen >= 10) + assert(!(src0.file == BRW_IMMEDIATE_VALUE && + src2.file == BRW_IMMEDIATE_VALUE)); + + assert(src0.file == BRW_IMMEDIATE_VALUE || src0.nr < 128); + assert(src1.file != BRW_IMMEDIATE_VALUE && src1.nr < 128); + assert(src2.file == BRW_IMMEDIATE_VALUE || src2.nr < 128); assert(dest.address_mode == BRW_ADDRESS_DIRECT); assert(src0.address_mode == BRW_ADDRESS_DIRECT); assert(src1.address_mode == BRW_ADDRESS_DIRECT); @@ -758,14 +767,19 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, assert(dest.file == BRW_GENERAL_REGISTER_FILE || dest.file == BRW_ARCHITECTURE_REGISTER_FILE); - if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE) { - brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, - BRW_ALIGN1_3SRC_ACCUMULATOR); - brw_inst_set_3src_dst_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR); - } else { - brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, - BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE); + if (devinfo->gen >= 12) { + brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, dest.file); brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr); + } else { + if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE) { + brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, + BRW_ALIGN1_3SRC_ACCUMULATOR); + brw_inst_set_3src_dst_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR); + } else { + brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, + BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE); + brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr); + } } brw_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, dest.subnr / 8); @@ -784,27 +798,26 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, brw_inst_set_3src_a1_src1_type(devinfo, inst, src1.type); brw_inst_set_3src_a1_src2_type(devinfo, inst, src2.type); - brw_inst_set_3src_a1_src0_vstride(devinfo, inst, - to_3src_align1_vstride(src0.vstride)); - brw_inst_set_3src_a1_src1_vstride(devinfo, inst, - to_3src_align1_vstride(src1.vstride)); - /* no vstride on src2 */ - - brw_inst_set_3src_a1_src0_hstride(devinfo, inst, - to_3src_align1_hstride(src0.hstride)); - brw_inst_set_3src_a1_src1_hstride(devinfo, inst, - to_3src_align1_hstride(src1.hstride)); - brw_inst_set_3src_a1_src2_hstride(devinfo, inst, - to_3src_align1_hstride(src2.hstride)); - - brw_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, src0.subnr); - if (src0.type == BRW_REGISTER_TYPE_NF) { - brw_inst_set_3src_src0_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR); + if (src0.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_3src_a1_src0_imm(devinfo, inst, src0.ud); } else { - brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr); + brw_inst_set_3src_a1_src0_vstride( + devinfo, inst, to_3src_align1_vstride(devinfo, src0.vstride)); + brw_inst_set_3src_a1_src0_hstride(devinfo, inst, + to_3src_align1_hstride(src0.hstride)); + brw_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, src0.subnr); + if (src0.type == BRW_REGISTER_TYPE_NF) { + brw_inst_set_3src_src0_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR); + } else { + brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr); + } + brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs); + brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate); } - brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs); - brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate); + brw_inst_set_3src_a1_src1_vstride( + devinfo, inst, to_3src_align1_vstride(devinfo, src1.vstride)); + brw_inst_set_3src_a1_src1_hstride(devinfo, inst, + to_3src_align1_hstride(src1.hstride)); brw_inst_set_3src_a1_src1_subreg_nr(devinfo, inst, src1.subnr); if (src1.file == BRW_ARCHITECTURE_REGISTER_FILE) { @@ -815,10 +828,17 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs); brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate); - brw_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, src2.subnr); - brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr); - brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs); - brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate); + if (src2.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_3src_a1_src2_imm(devinfo, inst, src2.ud); + } else { + brw_inst_set_3src_a1_src2_hstride(devinfo, inst, + to_3src_align1_hstride(src2.hstride)); + /* no vstride on src2 */ + brw_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, src2.subnr); + brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr); + brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs); + brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate); + } assert(src0.file == BRW_GENERAL_REGISTER_FILE || src0.file == BRW_IMMEDIATE_VALUE || @@ -829,31 +849,49 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, assert(src2.file == BRW_GENERAL_REGISTER_FILE || src2.file == BRW_IMMEDIATE_VALUE); - brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, - src0.file == BRW_GENERAL_REGISTER_FILE ? - BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : - BRW_ALIGN1_3SRC_IMMEDIATE_VALUE); - brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, - src1.file == BRW_GENERAL_REGISTER_FILE ? - BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : - BRW_ALIGN1_3SRC_ACCUMULATOR); - brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, - src2.file == BRW_GENERAL_REGISTER_FILE ? - BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : - BRW_ALIGN1_3SRC_IMMEDIATE_VALUE); + if (devinfo->gen >= 12) { + if (src0.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_3src_a1_src0_is_imm(devinfo, inst, 1); + } else { + brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, src0.file); + } + + brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, src1.file); + + if (src2.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_3src_a1_src2_is_imm(devinfo, inst, 1); + } else { + brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, src2.file); + } + } else { + brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, + src0.file == BRW_GENERAL_REGISTER_FILE ? + BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : + BRW_ALIGN1_3SRC_IMMEDIATE_VALUE); + brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, + src1.file == BRW_GENERAL_REGISTER_FILE ? + BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : + BRW_ALIGN1_3SRC_ACCUMULATOR); + brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, + src2.file == BRW_GENERAL_REGISTER_FILE ? + BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : + BRW_ALIGN1_3SRC_IMMEDIATE_VALUE); + } + } else { assert(dest.file == BRW_GENERAL_REGISTER_FILE || dest.file == BRW_MESSAGE_REGISTER_FILE); assert(dest.type == BRW_REGISTER_TYPE_F || dest.type == BRW_REGISTER_TYPE_DF || dest.type == BRW_REGISTER_TYPE_D || - dest.type == BRW_REGISTER_TYPE_UD); + dest.type == BRW_REGISTER_TYPE_UD || + (dest.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 8)); if (devinfo->gen == 6) { brw_inst_set_3src_a16_dst_reg_file(devinfo, inst, dest.file == BRW_MESSAGE_REGISTER_FILE); } brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr); - brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 16); + brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4); brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask); assert(src0.file == BRW_GENERAL_REGISTER_FILE); @@ -892,6 +930,22 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, */ brw_inst_set_3src_a16_src_type(devinfo, inst, dest.type); brw_inst_set_3src_a16_dst_type(devinfo, inst, dest.type); + + /* From the Bspec, 3D Media GPGPU, Instruction fields, srcType: + * + * "Three source instructions can use operands with mixed-mode + * precision. When SrcType field is set to :f or :hf it defines + * precision for source 0 only, and fields Src1Type and Src2Type + * define precision for other source operands: + * + * 0b = :f. Single precision Float (32-bit). + * 1b = :hf. Half precision Float (16-bit)." + */ + if (src1.type == BRW_REGISTER_TYPE_HF) + brw_inst_set_3src_a16_src1_type(devinfo, inst, 1); + + if (src2.type == BRW_REGISTER_TYPE_HF) + brw_inst_set_3src_a16_src2_type(devinfo, inst, 1); } } @@ -925,7 +979,15 @@ brw_inst *brw_##OP(struct brw_codegen *p, \ struct brw_reg src0, \ struct brw_reg src1, \ struct brw_reg src2) \ -{ \ +{ \ + if (p->current->access_mode == BRW_ALIGN_16) { \ + if (src0.vstride == BRW_VERTICAL_STRIDE_0) \ + src0.swizzle = BRW_SWIZZLE_XXXX; \ + if (src1.vstride == BRW_VERTICAL_STRIDE_0) \ + src1.swizzle = BRW_SWIZZLE_XXXX; \ + if (src2.vstride == BRW_VERTICAL_STRIDE_0) \ + src2.swizzle = BRW_SWIZZLE_XXXX; \ + } \ return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \ } @@ -946,37 +1008,19 @@ brw_inst *brw_##OP(struct brw_codegen *p, \ assert(src0.type == BRW_REGISTER_TYPE_DF); \ assert(src1.type == BRW_REGISTER_TYPE_DF); \ assert(src2.type == BRW_REGISTER_TYPE_DF); \ + } \ + \ + if (p->current->access_mode == BRW_ALIGN_16) { \ + if (src0.vstride == BRW_VERTICAL_STRIDE_0) \ + src0.swizzle = BRW_SWIZZLE_XXXX; \ + if (src1.vstride == BRW_VERTICAL_STRIDE_0) \ + src1.swizzle = BRW_SWIZZLE_XXXX; \ + if (src2.vstride == BRW_VERTICAL_STRIDE_0) \ + src2.swizzle = BRW_SWIZZLE_XXXX; \ } \ return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \ } -/* Rounding operations (other than RNDD) require two instructions - the first - * stores a rounded value (possibly the wrong way) in the dest register, but - * also sets a per-channel "increment bit" in the flag register. A predicated - * add of 1.0 fixes dest to contain the desired result. - * - * Sandybridge and later appear to round correctly without an ADD. - */ -#define ROUND(OP) \ -void brw_##OP(struct brw_codegen *p, \ - struct brw_reg dest, \ - struct brw_reg src) \ -{ \ - const struct gen_device_info *devinfo = p->devinfo; \ - brw_inst *rnd, *add; \ - rnd = next_insn(p, BRW_OPCODE_##OP); \ - brw_set_dest(p, rnd, dest); \ - brw_set_src0(p, rnd, src); \ - \ - if (devinfo->gen < 6) { \ - /* turn on round-increments */ \ - brw_inst_set_cond_modifier(devinfo, rnd, BRW_CONDITIONAL_R); \ - add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ - brw_inst_set_pred_control(devinfo, add, BRW_PREDICATE_NORMAL); \ - } \ -} - - ALU2(SEL) ALU1(NOT) ALU2(AND) @@ -986,9 +1030,14 @@ ALU2(SHR) ALU2(SHL) ALU1(DIM) ALU2(ASR) +ALU2(ROL) +ALU2(ROR) ALU3(CSEL) ALU1(FRC) ALU1(RNDD) +ALU1(RNDE) +ALU1(RNDU) +ALU1(RNDZ) ALU2(MAC) ALU2(MACH) ALU1(LZD) @@ -1008,17 +1057,14 @@ ALU1(CBIT) ALU2(ADDC) ALU2(SUBB) -ROUND(RNDZ) -ROUND(RNDE) - brw_inst * brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0) { const struct gen_device_info *devinfo = p->devinfo; /* When converting F->DF on IVB/BYT, every odd source channel is ignored. - * To avoid the problems that causes, we use a <1,2,0> source region to read - * each element twice. + * To avoid the problems that causes, we use an source region to + * read each element twice. */ if (devinfo->gen == 7 && !devinfo->is_haswell && brw_get_default_access_mode(p) == BRW_ALIGN_1 && @@ -1027,11 +1073,8 @@ brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0) src0.type == BRW_REGISTER_TYPE_D || src0.type == BRW_REGISTER_TYPE_UD) && !has_scalar_region(src0)) { - assert(src0.vstride == BRW_VERTICAL_STRIDE_4 && - src0.width == BRW_WIDTH_4 && - src0.hstride == BRW_HORIZONTAL_STRIDE_1); - - src0.vstride = BRW_VERTICAL_STRIDE_1; + assert(src0.vstride == src0.width + src0.hstride); + src0.vstride = src0.hstride; src0.width = BRW_WIDTH_2; src0.hstride = BRW_HORIZONTAL_STRIDE_0; } @@ -1177,9 +1220,12 @@ brw_F32TO16(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src) } if (needs_zero_fill) { - brw_inst_set_no_dd_clear(devinfo, inst, true); + if (devinfo->gen < 12) + brw_inst_set_no_dd_clear(devinfo, inst, true); + brw_set_default_swsb(p, tgl_swsb_null()); inst = brw_MOV(p, suboffset(dst, 1), brw_imm_w(0)); - brw_inst_set_no_dd_check(devinfo, inst, true); + if (devinfo->gen < 12) + brw_inst_set_no_dd_check(devinfo, inst, true); } brw_pop_insn_state(p); @@ -1225,9 +1271,11 @@ void brw_NOP(struct brw_codegen *p) brw_inst_set_opcode(p->devinfo, insn, BRW_OPCODE_NOP); } - - - +void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func) +{ + brw_inst *insn = next_insn(p, BRW_OPCODE_SYNC); + brw_inst_set_cond_modifier(p->devinfo, insn, func); +} /*********************************************************************** * Comparisons, if/else/endif @@ -1331,7 +1379,8 @@ brw_IF(struct brw_codegen *p, unsigned execute_size) brw_inst_set_uip(devinfo, insn, 0); } else { brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); - brw_set_src0(p, insn, brw_imm_d(0)); + if (devinfo->gen < 12) + brw_set_src0(p, insn, brw_imm_d(0)); brw_inst_set_jip(devinfo, insn, 0); brw_inst_set_uip(devinfo, insn, 0); } @@ -1531,7 +1580,8 @@ brw_ELSE(struct brw_codegen *p) brw_inst_set_uip(devinfo, insn, 0); } else { brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src0(p, insn, brw_imm_d(0)); + if (devinfo->gen < 12) + brw_set_src0(p, insn, brw_imm_d(0)); brw_inst_set_jip(devinfo, insn, 0); brw_inst_set_uip(devinfo, insn, 0); } @@ -1677,18 +1727,27 @@ brw_CONT(struct brw_codegen *p) } brw_inst * -gen6_HALT(struct brw_codegen *p) +brw_HALT(struct brw_codegen *p) { const struct gen_device_info *devinfo = p->devinfo; brw_inst *insn; insn = next_insn(p, BRW_OPCODE_HALT); brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - if (devinfo->gen >= 8) { - brw_set_src0(p, insn, brw_imm_d(0x0)); - } else { + if (devinfo->gen < 6) { + /* From the Gen4 PRM: + * + * "IP register must be put (for example, by the assembler) at + * and locations. + */ + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); /* exitcode updated later. */ + } else if (devinfo->gen < 8) { brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */ + } else if (devinfo->gen < 12) { + brw_set_src0(p, insn, brw_imm_d(0x0)); } brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); @@ -1784,7 +1843,8 @@ brw_WHILE(struct brw_codegen *p) if (devinfo->gen >= 8) { brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src0(p, insn, brw_imm_d(0)); + if (devinfo->gen < 12) + brw_set_src0(p, insn, brw_imm_d(0)); brw_inst_set_jip(devinfo, insn, br * (do_insn - insn)); } else if (devinfo->gen == 7) { brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); @@ -1952,8 +2012,10 @@ void gen6_math(struct brw_codegen *p, assert(src1.file == BRW_GENERAL_REGISTER_FILE || (devinfo->gen >= 8 && src1.file == BRW_IMMEDIATE_VALUE)); } else { - assert(src0.type == BRW_REGISTER_TYPE_F); - assert(src1.type == BRW_REGISTER_TYPE_F); + assert(src0.type == BRW_REGISTER_TYPE_F || + (src0.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 9)); + assert(src1.type == BRW_REGISTER_TYPE_F || + (src1.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 9)); } /* Source modifiers are ignored for extended math instructions on Gen6. */ @@ -2001,7 +2063,8 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, const unsigned target_cache = (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE : devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE : - BRW_DATAPORT_READ_TARGET_RENDER_CACHE); + BRW_SFID_DATAPORT_WRITE); + const struct tgl_swsb swsb = brw_get_default_swsb(p); uint32_t msg_type; if (devinfo->gen >= 6) @@ -2021,11 +2084,13 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); /* set message header global offset field (reg 0, element 2) */ brw_set_default_exec_size(p, BRW_EXECUTE_1); + brw_set_default_swsb(p, tgl_swsb_null()); brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, @@ -2033,6 +2098,7 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, brw_imm_ud(offset)); brw_pop_insn_state(p); + brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); } { @@ -2042,6 +2108,7 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, struct brw_reg src_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); + brw_inst_set_sfid(devinfo, insn, target_cache); brw_inst_set_compression(devinfo, insn, false); if (brw_inst_exec_size(devinfo, insn) >= 16) @@ -2081,18 +2148,12 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, else msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; - brw_set_dp_write_message(p, - insn, - brw_scratch_surface_idx(p), - BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8), - msg_type, - target_cache, - mlen, - true, /* header_present */ - 0, /* not a render target */ - send_commit_msg, /* response_length */ - 0, /* eot */ - send_commit_msg); + brw_set_desc(p, insn, + brw_message_desc(devinfo, mlen, send_commit_msg, true) | + brw_dp_write_desc(devinfo, brw_scratch_surface_idx(p), + BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8), + msg_type, 0, /* not a render target */ + send_commit_msg)); } } @@ -2112,6 +2173,7 @@ brw_oword_block_read_scratch(struct brw_codegen *p, unsigned offset) { const struct gen_device_info *devinfo = p->devinfo; + const struct tgl_swsb swsb = brw_get_default_swsb(p); if (devinfo->gen >= 6) offset /= 16; @@ -2134,10 +2196,11 @@ brw_oword_block_read_scratch(struct brw_codegen *p, const unsigned target_cache = (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE : devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE : - BRW_DATAPORT_READ_TARGET_RENDER_CACHE); + BRW_SFID_DATAPORT_READ); { brw_push_insn_state(p); + brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_set_default_mask_control(p, BRW_MASK_DISABLE); @@ -2146,14 +2209,17 @@ brw_oword_block_read_scratch(struct brw_codegen *p, /* set message header global offset field (reg 0, element 2) */ brw_set_default_exec_size(p, BRW_EXECUTE_1); + brw_set_default_swsb(p, tgl_swsb_null()); brw_MOV(p, get_element_ud(mrf, 2), brw_imm_ud(offset)); brw_pop_insn_state(p); + brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); } { brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); + brw_inst_set_sfid(devinfo, insn, target_cache); assert(brw_inst_pred_control(devinfo, insn) == 0); brw_inst_set_compression(devinfo, insn, false); @@ -2165,15 +2231,12 @@ brw_oword_block_read_scratch(struct brw_codegen *p, brw_inst_set_base_mrf(devinfo, insn, mrf.nr); } - brw_set_dp_read_message(p, - insn, - brw_scratch_surface_idx(p), - BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8), - BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ - target_cache, - 1, /* msg_length */ - true, /* header_present */ - rlen); + brw_set_desc(p, insn, + brw_message_desc(devinfo, 1, rlen, true) | + brw_dp_read_desc(devinfo, brw_scratch_surface_idx(p), + BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8), + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, + BRW_DATAPORT_READ_TARGET_RENDER_CACHE)); } } @@ -2225,8 +2288,9 @@ void brw_oword_block_read(struct brw_codegen *p, const struct gen_device_info *devinfo = p->devinfo; const unsigned target_cache = (devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE : - BRW_DATAPORT_READ_TARGET_DATA_CACHE); + BRW_SFID_DATAPORT_READ); const unsigned exec_size = 1 << brw_get_default_exec_size(p); + const struct tgl_swsb swsb = brw_get_default_swsb(p); /* On newer hardware, offset is in units of owords. */ if (devinfo->gen >= 6) @@ -2241,10 +2305,12 @@ void brw_oword_block_read(struct brw_codegen *p, brw_push_insn_state(p); brw_set_default_exec_size(p, BRW_EXECUTE_8); + brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); /* set message header global offset field (reg 0, element 2) */ brw_set_default_exec_size(p, BRW_EXECUTE_1); + brw_set_default_swsb(p, tgl_swsb_null()); brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, @@ -2252,8 +2318,12 @@ void brw_oword_block_read(struct brw_codegen *p, brw_imm_ud(offset)); brw_pop_insn_state(p); + brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); + brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); + brw_inst_set_sfid(devinfo, insn, target_cache); + /* cast dest to a uword[8] vector */ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); @@ -2265,13 +2335,12 @@ void brw_oword_block_read(struct brw_codegen *p, brw_inst_set_base_mrf(devinfo, insn, mrf.nr); } - brw_set_dp_read_message(p, insn, bind_table_index, - BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size), - BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, - target_cache, - 1, /* msg_length */ - true, /* header_present */ - DIV_ROUND_UP(exec_size, 8)); /* response_length */ + brw_set_desc(p, insn, + brw_message_desc(devinfo, 1, DIV_ROUND_UP(exec_size, 8), true) | + brw_dp_read_desc(devinfo, bind_table_index, + BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size), + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, + BRW_DATAPORT_READ_TARGET_DATA_CACHE)); brw_pop_insn_state(p); } @@ -2291,7 +2360,7 @@ brw_fb_WRITE(struct brw_codegen *p, const struct gen_device_info *devinfo = p->devinfo; const unsigned target_cache = (devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE : - BRW_DATAPORT_READ_TARGET_RENDER_CACHE); + BRW_SFID_DATAPORT_WRITE); brw_inst *insn; unsigned msg_type; struct brw_reg dest, src0; @@ -2306,6 +2375,7 @@ brw_fb_WRITE(struct brw_codegen *p, } else { insn = next_insn(p, BRW_OPCODE_SEND); } + brw_inst_set_sfid(devinfo, insn, target_cache); brw_inst_set_compression(devinfo, insn, false); if (devinfo->gen >= 6) { @@ -2323,18 +2393,13 @@ brw_fb_WRITE(struct brw_codegen *p, brw_set_dest(p, insn, dest); brw_set_src0(p, insn, src0); - brw_set_dp_write_message(p, - insn, - binding_table_index, - msg_control, - msg_type, - target_cache, - msg_length, - header_present, - last_render_target, - response_length, - eot, - 0 /* send_commit_msg */); + brw_set_desc(p, insn, + brw_message_desc(devinfo, msg_length, response_length, + header_present) | + brw_dp_write_desc(devinfo, binding_table_index, msg_control, + msg_type, last_render_target, + 0 /* send_commit_msg */)); + brw_inst_set_eot(devinfo, insn, eot); return insn; } @@ -2354,14 +2419,16 @@ gen9_fb_READ(struct brw_codegen *p, brw_get_default_exec_size(p) == BRW_EXECUTE_16 ? 0 : 1; brw_inst *insn = next_insn(p, BRW_OPCODE_SENDC); + brw_inst_set_sfid(devinfo, insn, GEN6_SFID_DATAPORT_RENDER_CACHE); brw_set_dest(p, insn, dst); brw_set_src0(p, insn, payload); - brw_set_dp_read_message(p, insn, binding_table_index, - per_sample << 5 | msg_subtype, - GEN9_DATAPORT_RC_RENDER_TARGET_READ, - GEN6_SFID_DATAPORT_RENDER_CACHE, - msg_length, true /* header_present */, - response_length); + brw_set_desc( + p, insn, + brw_message_desc(devinfo, msg_length, response_length, true) | + brw_dp_read_desc(devinfo, binding_table_index, + per_sample << 5 | msg_subtype, + GEN9_DATAPORT_RC_RENDER_TARGET_READ, + BRW_DATAPORT_READ_TARGET_RENDER_CACHE)); brw_inst_set_rt_slot_group(devinfo, insn, brw_get_default_group(p) / 16); return insn; @@ -2392,6 +2459,7 @@ void brw_SAMPLE(struct brw_codegen *p, gen6_resolve_implied_move(p, &src0, msg_reg_nr); insn = next_insn(p, BRW_OPCODE_SEND); + brw_inst_set_sfid(devinfo, insn, BRW_SFID_SAMPLER); brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NONE); /* XXX */ /* From the 965 PRM (volume 4, part 1, section 14.2.41): @@ -2413,15 +2481,11 @@ void brw_SAMPLE(struct brw_codegen *p, brw_set_dest(p, insn, dest); brw_set_src0(p, insn, src0); - brw_set_sampler_message(p, insn, - binding_table_index, - sampler, - msg_type, - response_length, - msg_length, - header_present, - simd_mode, - return_format); + brw_set_desc(p, insn, + brw_message_desc(devinfo, msg_length, response_length, + header_present) | + brw_sampler_desc(devinfo, binding_table_index, sampler, + msg_type, simd_mode, return_format)); } /* Adjust the message header's sampler state pointer to @@ -2461,12 +2525,15 @@ void brw_adjust_sampler_state_pointer(struct brw_codegen *p, struct brw_reg temp = get_element_ud(header, 3); + brw_push_insn_state(p); brw_AND(p, temp, get_element_ud(sampler_index, 0), brw_imm_ud(0x0f0)); + brw_set_default_swsb(p, tgl_swsb_regdist(1)); brw_SHL(p, temp, temp, brw_imm_ud(4)); brw_ADD(p, get_element_ud(header, 3), get_element_ud(brw_vec8_grf(0, 0), 3), temp); + brw_pop_insn_state(p); } } @@ -2522,34 +2589,28 @@ void brw_urb_WRITE(struct brw_codegen *p, swizzle); } -struct brw_inst * +void brw_send_indirect_message(struct brw_codegen *p, unsigned sfid, struct brw_reg dst, struct brw_reg payload, struct brw_reg desc, - unsigned desc_imm) + unsigned desc_imm, + bool eot) { const struct gen_device_info *devinfo = p->devinfo; struct brw_inst *send; - int setup; dst = retype(dst, BRW_REGISTER_TYPE_UW); assert(desc.type == BRW_REGISTER_TYPE_UD); - /* We hold on to the setup instruction (the SEND in the direct case, the OR - * in the indirect case) by its index in the instruction store. The - * pointer returned by next_insn() may become invalid if emitting the SEND - * in the indirect case reallocs the store. - */ - if (desc.file == BRW_IMMEDIATE_VALUE) { - setup = p->nr_insn; send = next_insn(p, BRW_OPCODE_SEND); + brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD)); brw_set_desc(p, send, desc.ud | desc_imm); - } else { + const struct tgl_swsb swsb = brw_get_default_swsb(p); struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); brw_push_insn_state(p); @@ -2557,44 +2618,157 @@ brw_send_indirect_message(struct brw_codegen *p, brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_exec_size(p, BRW_EXECUTE_1); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); /* Load the indirect descriptor to an address register using OR so the - * caller can specify additional descriptor bits with the usual - * brw_set_*_message() helper functions. + * caller can specify additional descriptor bits with the desc_imm + * immediate. */ - setup = p->nr_insn; brw_OR(p, addr, desc, brw_imm_ud(desc_imm)); brw_pop_insn_state(p); + brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); send = next_insn(p, BRW_OPCODE_SEND); - brw_set_src1(p, send, addr); - } + brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD)); - if (dst.width < BRW_EXECUTE_8) - brw_inst_set_exec_size(devinfo, send, dst.width); + if (devinfo->gen >= 12) + brw_inst_set_send_sel_reg32_desc(devinfo, send, true); + else + brw_set_src1(p, send, addr); + } brw_set_dest(p, send, dst); - brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD)); brw_inst_set_sfid(devinfo, send, sfid); + brw_inst_set_eot(devinfo, send, eot); +} + +void +brw_send_indirect_split_message(struct brw_codegen *p, + unsigned sfid, + struct brw_reg dst, + struct brw_reg payload0, + struct brw_reg payload1, + struct brw_reg desc, + unsigned desc_imm, + struct brw_reg ex_desc, + unsigned ex_desc_imm, + bool eot) +{ + const struct gen_device_info *devinfo = p->devinfo; + struct brw_inst *send; + + dst = retype(dst, BRW_REGISTER_TYPE_UW); + + assert(desc.type == BRW_REGISTER_TYPE_UD); + + if (desc.file == BRW_IMMEDIATE_VALUE) { + desc.ud |= desc_imm; + } else { + const struct tgl_swsb swsb = brw_get_default_swsb(p); + struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); + + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_exec_size(p, BRW_EXECUTE_1); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); + + /* Load the indirect descriptor to an address register using OR so the + * caller can specify additional descriptor bits with the desc_imm + * immediate. + */ + brw_OR(p, addr, desc, brw_imm_ud(desc_imm)); + + brw_pop_insn_state(p); + desc = addr; + + brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); + } + + if (ex_desc.file == BRW_IMMEDIATE_VALUE && + (devinfo->gen >= 12 || (ex_desc.ud & INTEL_MASK(15, 12)) == 0)) { + ex_desc.ud |= ex_desc_imm; + } else { + const struct tgl_swsb swsb = brw_get_default_swsb(p); + struct brw_reg addr = retype(brw_address_reg(2), BRW_REGISTER_TYPE_UD); - return &p->store[setup]; + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_exec_size(p, BRW_EXECUTE_1); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); + + /* Load the indirect extended descriptor to an address register using OR + * so the caller can specify additional descriptor bits with the + * desc_imm immediate. + * + * Even though the instruction dispatcher always pulls the SFID and EOT + * fields from the instruction itself, actual external unit which + * processes the message gets the SFID and EOT from the extended + * descriptor which comes from the address register. If we don't OR + * those two bits in, the external unit may get confused and hang. + */ + unsigned imm_part = ex_desc_imm | sfid | eot << 5; + + if (ex_desc.file == BRW_IMMEDIATE_VALUE) { + /* ex_desc bits 15:12 don't exist in the instruction encoding prior + * to Gen12, so we may have fallen back to an indirect extended + * descriptor. + */ + brw_MOV(p, addr, brw_imm_ud(ex_desc.ud | imm_part)); + } else { + brw_OR(p, addr, ex_desc, brw_imm_ud(imm_part)); + } + + brw_pop_insn_state(p); + ex_desc = addr; + + brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); + } + + send = next_insn(p, devinfo->gen >= 12 ? BRW_OPCODE_SEND : BRW_OPCODE_SENDS); + brw_set_dest(p, send, dst); + brw_set_src0(p, send, retype(payload0, BRW_REGISTER_TYPE_UD)); + brw_set_src1(p, send, retype(payload1, BRW_REGISTER_TYPE_UD)); + + if (desc.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_send_sel_reg32_desc(devinfo, send, 0); + brw_inst_set_send_desc(devinfo, send, desc.ud); + } else { + assert(desc.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(desc.nr == BRW_ARF_ADDRESS); + assert(desc.subnr == 0); + brw_inst_set_send_sel_reg32_desc(devinfo, send, 1); + } + + if (ex_desc.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0); + brw_inst_set_sends_ex_desc(devinfo, send, ex_desc.ud); + } else { + assert(ex_desc.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(ex_desc.nr == BRW_ARF_ADDRESS); + assert((ex_desc.subnr & 0x3) == 0); + brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1); + brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, ex_desc.subnr >> 2); + } + + brw_inst_set_sfid(devinfo, send, sfid); + brw_inst_set_eot(devinfo, send, eot); } -static struct brw_inst * +static void brw_send_indirect_surface_message(struct brw_codegen *p, unsigned sfid, struct brw_reg dst, struct brw_reg payload, struct brw_reg surface, - unsigned message_len, - unsigned response_len, - bool header_present) + unsigned desc_imm) { - const struct gen_device_info *devinfo = p->devinfo; - struct brw_inst *insn; - if (surface.file != BRW_IMMEDIATE_VALUE) { + const struct tgl_swsb swsb = brw_get_default_swsb(p); struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); brw_push_insn_state(p); @@ -2602,26 +2776,23 @@ brw_send_indirect_surface_message(struct brw_codegen *p, brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_exec_size(p, BRW_EXECUTE_1); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); /* Mask out invalid bits from the surface index to avoid hangs e.g. when * some surface array is accessed out of bounds. */ - insn = brw_AND(p, addr, - suboffset(vec1(retype(surface, BRW_REGISTER_TYPE_UD)), - BRW_GET_SWZ(surface.swizzle, 0)), - brw_imm_ud(0xff)); + brw_AND(p, addr, + suboffset(vec1(retype(surface, BRW_REGISTER_TYPE_UD)), + BRW_GET_SWZ(surface.swizzle, 0)), + brw_imm_ud(0xff)); brw_pop_insn_state(p); surface = addr; + brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); } - insn = brw_send_indirect_message(p, sfid, dst, payload, surface, 0); - brw_inst_set_mlen(devinfo, insn, message_len); - brw_inst_set_rlen(devinfo, insn, response_len); - brw_inst_set_header_present(devinfo, insn, header_present); - - return insn; + brw_send_indirect_message(p, sfid, dst, payload, surface, desc_imm, false); } static bool @@ -2670,6 +2841,8 @@ brw_find_next_block_end(struct brw_codegen *p, int start_offset) case BRW_OPCODE_HALT: if (depth == 0) return offset; + default: + break; } } @@ -2775,6 +2948,9 @@ brw_set_uip_jip(struct brw_codegen *p, int start_offset) assert(brw_inst_uip(devinfo, insn) != 0); assert(brw_inst_jip(devinfo, insn) != 0); break; + + default: + break; } } } @@ -2830,73 +3006,35 @@ brw_svb_write(struct brw_codegen *p, const unsigned target_cache = (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE : devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE : - BRW_DATAPORT_READ_TARGET_RENDER_CACHE); + BRW_SFID_DATAPORT_WRITE); brw_inst *insn; gen6_resolve_implied_move(p, &src0, msg_reg_nr); insn = next_insn(p, BRW_OPCODE_SEND); + brw_inst_set_sfid(devinfo, insn, target_cache); brw_set_dest(p, insn, dest); brw_set_src0(p, insn, src0); - brw_set_src1(p, insn, brw_imm_d(0)); - brw_set_dp_write_message(p, insn, - binding_table_index, - 0, /* msg_control: ignored */ - GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE, - target_cache, - 1, /* msg_length */ - true, /* header_present */ - 0, /* last_render_target: ignored */ - send_commit_msg, /* response_length */ - 0, /* end_of_thread */ - send_commit_msg); /* send_commit_msg */ + brw_set_desc(p, insn, + brw_message_desc(devinfo, 1, send_commit_msg, true) | + brw_dp_write_desc(devinfo, binding_table_index, + 0, /* msg_control: ignored */ + GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE, + 0, /* last_render_target: ignored */ + send_commit_msg)); /* send_commit_msg */ } static unsigned brw_surface_payload_size(struct brw_codegen *p, unsigned num_channels, - bool has_simd4x2, - bool has_simd16) + unsigned exec_size /**< 0 for SIMD4x2 */) { - if (has_simd4x2 && brw_get_default_access_mode(p) == BRW_ALIGN_16) - return 1; - else if (has_simd16 && brw_get_default_exec_size(p) == BRW_EXECUTE_16) - return 2 * num_channels; - else + if (exec_size == 0) + return 1; /* SIMD4x2 */ + else if (exec_size <= 8) return num_channels; -} - -static void -brw_set_dp_untyped_atomic_message(struct brw_codegen *p, - brw_inst *insn, - unsigned atomic_op, - bool response_expected) -{ - const struct gen_device_info *devinfo = p->devinfo; - unsigned msg_control = - atomic_op | /* Atomic Operation Type: BRW_AOP_* */ - (response_expected ? 1 << 5 : 0); /* Return data expected */ - - if (devinfo->gen >= 8 || devinfo->is_haswell) { - if (brw_get_default_access_mode(p) == BRW_ALIGN_1) { - if (brw_get_default_exec_size(p) != BRW_EXECUTE_16) - msg_control |= 1 << 4; /* SIMD8 mode */ - - brw_inst_set_dp_msg_type(devinfo, insn, - HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP); - } else { - brw_inst_set_dp_msg_type(devinfo, insn, - HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2); - } - } else { - brw_inst_set_dp_msg_type(devinfo, insn, - GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP); - - if (brw_get_default_exec_size(p) != BRW_EXECUTE_16) - msg_control |= 1 << 4; /* SIMD8 mode */ - } - - brw_inst_set_dp_msg_control(devinfo, insn, msg_control); + else + return 2 * num_channels; } void @@ -2914,6 +3052,16 @@ brw_untyped_atomic(struct brw_codegen *p, HSW_SFID_DATAPORT_DATA_CACHE_1 : GEN7_SFID_DATAPORT_DATA_CACHE); const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; + /* SIMD4x2 untyped atomic instructions only exist on HSW+ */ + const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell; + const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : + has_simd4x2 ? 0 : 8; + const unsigned response_length = + brw_surface_payload_size(p, response_expected, exec_size); + const unsigned desc = + brw_message_desc(devinfo, msg_length, response_length, header_present) | + brw_dp_untyped_atomic_desc(devinfo, exec_size, atomic_op, + response_expected); /* Mask out unused components -- This is especially important in Align16 * mode on generations that don't have native support for SIMD4x2 atomics, * because unused but enabled components will cause the dataport to perform @@ -2921,37 +3069,9 @@ brw_untyped_atomic(struct brw_codegen *p, * uninitialized Y, Z and W coordinates of the payload. */ const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X; - struct brw_inst *insn = brw_send_indirect_surface_message( - p, sfid, brw_writemask(dst, mask), payload, surface, msg_length, - brw_surface_payload_size(p, response_expected, - devinfo->gen >= 8 || devinfo->is_haswell, true), - header_present); - brw_set_dp_untyped_atomic_message( - p, insn, atomic_op, response_expected); -} - -static void -brw_set_dp_untyped_surface_read_message(struct brw_codegen *p, - struct brw_inst *insn, - unsigned num_channels) -{ - const struct gen_device_info *devinfo = p->devinfo; - /* Set mask of 32-bit channels to drop. */ - unsigned msg_control = 0xf & (0xf << num_channels); - - if (brw_get_default_access_mode(p) == BRW_ALIGN_1) { - if (brw_get_default_exec_size(p) == BRW_EXECUTE_16) - msg_control |= 1 << 4; /* SIMD16 mode */ - else - msg_control |= 2 << 4; /* SIMD8 mode */ - } - - brw_inst_set_dp_msg_type(devinfo, insn, - (devinfo->gen >= 8 || devinfo->is_haswell ? - HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ : - GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ)); - brw_inst_set_dp_msg_control(devinfo, insn, msg_control); + brw_send_indirect_surface_message(p, sfid, brw_writemask(dst, mask), + payload, surface, desc); } void @@ -2966,41 +3086,15 @@ brw_untyped_surface_read(struct brw_codegen *p, const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? HSW_SFID_DATAPORT_DATA_CACHE_1 : GEN7_SFID_DATAPORT_DATA_CACHE); - struct brw_inst *insn = brw_send_indirect_surface_message( - p, sfid, dst, payload, surface, msg_length, - brw_surface_payload_size(p, num_channels, true, true), - false); - - brw_set_dp_untyped_surface_read_message( - p, insn, num_channels); -} - -static void -brw_set_dp_untyped_surface_write_message(struct brw_codegen *p, - struct brw_inst *insn, - unsigned num_channels) -{ - const struct gen_device_info *devinfo = p->devinfo; - /* Set mask of 32-bit channels to drop. */ - unsigned msg_control = 0xf & (0xf << num_channels); - - if (brw_get_default_access_mode(p) == BRW_ALIGN_1) { - if (brw_get_default_exec_size(p) == BRW_EXECUTE_16) - msg_control |= 1 << 4; /* SIMD16 mode */ - else - msg_control |= 2 << 4; /* SIMD8 mode */ - } else { - if (devinfo->gen >= 8 || devinfo->is_haswell) - msg_control |= 0 << 4; /* SIMD4x2 mode */ - else - msg_control |= 2 << 4; /* SIMD8 mode */ - } + const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; + const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : 0; + const unsigned response_length = + brw_surface_payload_size(p, num_channels, exec_size); + const unsigned desc = + brw_message_desc(devinfo, msg_length, response_length, false) | + brw_dp_untyped_surface_rw_desc(devinfo, exec_size, num_channels, false); - brw_inst_set_dp_msg_type(devinfo, insn, - devinfo->gen >= 8 || devinfo->is_haswell ? - HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE : - GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE); - brw_inst_set_dp_msg_control(devinfo, insn, msg_control); + brw_send_indirect_surface_message(p, sfid, dst, payload, surface, desc); } void @@ -3016,271 +3110,26 @@ brw_untyped_surface_write(struct brw_codegen *p, HSW_SFID_DATAPORT_DATA_CACHE_1 : GEN7_SFID_DATAPORT_DATA_CACHE); const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; + /* SIMD4x2 untyped surface write instructions only exist on HSW+ */ + const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell; + const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : + has_simd4x2 ? 0 : 8; + const unsigned desc = + brw_message_desc(devinfo, msg_length, 0, header_present) | + brw_dp_untyped_surface_rw_desc(devinfo, exec_size, num_channels, true); /* Mask out unused components -- See comment in brw_untyped_atomic(). */ - const unsigned mask = devinfo->gen == 7 && !devinfo->is_haswell && !align1 ? - WRITEMASK_X : WRITEMASK_XYZW; - struct brw_inst *insn = brw_send_indirect_surface_message( - p, sfid, brw_writemask(brw_null_reg(), mask), - payload, surface, msg_length, 0, header_present); - - brw_set_dp_untyped_surface_write_message( - p, insn, num_channels); -} - -static unsigned -brw_byte_scattered_data_element_from_bit_size(unsigned bit_size) -{ - switch (bit_size) { - case 8: - return GEN7_BYTE_SCATTERED_DATA_ELEMENT_BYTE; - case 16: - return GEN7_BYTE_SCATTERED_DATA_ELEMENT_WORD; - case 32: - return GEN7_BYTE_SCATTERED_DATA_ELEMENT_DWORD; - default: - unreachable("Unsupported bit_size for byte scattered messages"); - } -} - - -void -brw_byte_scattered_read(struct brw_codegen *p, - struct brw_reg dst, - struct brw_reg payload, - struct brw_reg surface, - unsigned msg_length, - unsigned bit_size) -{ - const struct gen_device_info *devinfo = p->devinfo; - assert(devinfo->gen > 7 || devinfo->is_haswell); - assert(brw_get_default_access_mode(p) == BRW_ALIGN_1); - const unsigned sfid = GEN7_SFID_DATAPORT_DATA_CACHE; - - struct brw_inst *insn = brw_send_indirect_surface_message( - p, sfid, dst, payload, surface, msg_length, - brw_surface_payload_size(p, 1, true, true), - false); - - unsigned msg_control = - brw_byte_scattered_data_element_from_bit_size(bit_size) << 2; - - if (brw_get_default_exec_size(p) == BRW_EXECUTE_16) - msg_control |= 1; /* SIMD16 mode */ - else - msg_control |= 0; /* SIMD8 mode */ - - brw_inst_set_dp_msg_type(devinfo, insn, - HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ); - brw_inst_set_dp_msg_control(devinfo, insn, msg_control); -} - -void -brw_byte_scattered_write(struct brw_codegen *p, - struct brw_reg payload, - struct brw_reg surface, - unsigned msg_length, - unsigned bit_size, - bool header_present) -{ - const struct gen_device_info *devinfo = p->devinfo; - assert(devinfo->gen > 7 || devinfo->is_haswell); - assert(brw_get_default_access_mode(p) == BRW_ALIGN_1); - const unsigned sfid = GEN7_SFID_DATAPORT_DATA_CACHE; - - struct brw_inst *insn = brw_send_indirect_surface_message( - p, sfid, brw_writemask(brw_null_reg(), WRITEMASK_XYZW), - payload, surface, msg_length, 0, header_present); - - unsigned msg_control = - brw_byte_scattered_data_element_from_bit_size(bit_size) << 2; - - if (brw_get_default_exec_size(p) == BRW_EXECUTE_16) - msg_control |= 1; - else - msg_control |= 0; - - brw_inst_set_dp_msg_type(devinfo, insn, - HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE); - brw_inst_set_dp_msg_control(devinfo, insn, msg_control); -} - -static void -brw_set_dp_typed_atomic_message(struct brw_codegen *p, - struct brw_inst *insn, - unsigned atomic_op, - bool response_expected) -{ - const struct gen_device_info *devinfo = p->devinfo; - unsigned msg_control = - atomic_op | /* Atomic Operation Type: BRW_AOP_* */ - (response_expected ? 1 << 5 : 0); /* Return data expected */ - - if (devinfo->gen >= 8 || devinfo->is_haswell) { - if (brw_get_default_access_mode(p) == BRW_ALIGN_1) { - if ((brw_get_default_group(p) / 8) % 2 == 1) - msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */ - - brw_inst_set_dp_msg_type(devinfo, insn, - HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP); - } else { - brw_inst_set_dp_msg_type(devinfo, insn, - HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2); - } - - } else { - brw_inst_set_dp_msg_type(devinfo, insn, - GEN7_DATAPORT_RC_TYPED_ATOMIC_OP); - - if ((brw_get_default_group(p) / 8) % 2 == 1) - msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */ - } - - brw_inst_set_dp_msg_control(devinfo, insn, msg_control); -} - -void -brw_typed_atomic(struct brw_codegen *p, - struct brw_reg dst, - struct brw_reg payload, - struct brw_reg surface, - unsigned atomic_op, - unsigned msg_length, - bool response_expected, - bool header_present) { - const struct gen_device_info *devinfo = p->devinfo; - const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? - HSW_SFID_DATAPORT_DATA_CACHE_1 : - GEN6_SFID_DATAPORT_RENDER_CACHE); - const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; - /* Mask out unused components -- See comment in brw_untyped_atomic(). */ - const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X; - struct brw_inst *insn = brw_send_indirect_surface_message( - p, sfid, brw_writemask(dst, mask), payload, surface, msg_length, - brw_surface_payload_size(p, response_expected, - devinfo->gen >= 8 || devinfo->is_haswell, false), - header_present); - - brw_set_dp_typed_atomic_message( - p, insn, atomic_op, response_expected); -} - -static void -brw_set_dp_typed_surface_read_message(struct brw_codegen *p, - struct brw_inst *insn, - unsigned num_channels) -{ - const struct gen_device_info *devinfo = p->devinfo; - /* Set mask of unused channels. */ - unsigned msg_control = 0xf & (0xf << num_channels); - - if (devinfo->gen >= 8 || devinfo->is_haswell) { - if (brw_get_default_access_mode(p) == BRW_ALIGN_1) { - if ((brw_get_default_group(p) / 8) % 2 == 1) - msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */ - else - msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */ - } - - brw_inst_set_dp_msg_type(devinfo, insn, - HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ); - } else { - if (brw_get_default_access_mode(p) == BRW_ALIGN_1) { - if ((brw_get_default_group(p) / 8) % 2 == 1) - msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */ - } - - brw_inst_set_dp_msg_type(devinfo, insn, - GEN7_DATAPORT_RC_TYPED_SURFACE_READ); - } + const unsigned mask = !has_simd4x2 && !align1 ? WRITEMASK_X : WRITEMASK_XYZW; - brw_inst_set_dp_msg_control(devinfo, insn, msg_control); -} - -void -brw_typed_surface_read(struct brw_codegen *p, - struct brw_reg dst, - struct brw_reg payload, - struct brw_reg surface, - unsigned msg_length, - unsigned num_channels, - bool header_present) -{ - const struct gen_device_info *devinfo = p->devinfo; - const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? - HSW_SFID_DATAPORT_DATA_CACHE_1 : - GEN6_SFID_DATAPORT_RENDER_CACHE); - struct brw_inst *insn = brw_send_indirect_surface_message( - p, sfid, dst, payload, surface, msg_length, - brw_surface_payload_size(p, num_channels, - devinfo->gen >= 8 || devinfo->is_haswell, false), - header_present); - - brw_set_dp_typed_surface_read_message( - p, insn, num_channels); -} - -static void -brw_set_dp_typed_surface_write_message(struct brw_codegen *p, - struct brw_inst *insn, - unsigned num_channels) -{ - const struct gen_device_info *devinfo = p->devinfo; - /* Set mask of unused channels. */ - unsigned msg_control = 0xf & (0xf << num_channels); - - if (devinfo->gen >= 8 || devinfo->is_haswell) { - if (brw_get_default_access_mode(p) == BRW_ALIGN_1) { - if ((brw_get_default_group(p) / 8) % 2 == 1) - msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */ - else - msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */ - } - - brw_inst_set_dp_msg_type(devinfo, insn, - HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE); - - } else { - if (brw_get_default_access_mode(p) == BRW_ALIGN_1) { - if ((brw_get_default_group(p) / 8) % 2 == 1) - msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */ - } - - brw_inst_set_dp_msg_type(devinfo, insn, - GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE); - } - - brw_inst_set_dp_msg_control(devinfo, insn, msg_control); -} - -void -brw_typed_surface_write(struct brw_codegen *p, - struct brw_reg payload, - struct brw_reg surface, - unsigned msg_length, - unsigned num_channels, - bool header_present) -{ - const struct gen_device_info *devinfo = p->devinfo; - const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? - HSW_SFID_DATAPORT_DATA_CACHE_1 : - GEN6_SFID_DATAPORT_RENDER_CACHE); - const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; - /* Mask out unused components -- See comment in brw_untyped_atomic(). */ - const unsigned mask = (devinfo->gen == 7 && !devinfo->is_haswell && !align1 ? - WRITEMASK_X : WRITEMASK_XYZW); - struct brw_inst *insn = brw_send_indirect_surface_message( - p, sfid, brw_writemask(brw_null_reg(), mask), - payload, surface, msg_length, 0, header_present); - - brw_set_dp_typed_surface_write_message( - p, insn, num_channels); + brw_send_indirect_surface_message(p, sfid, brw_writemask(brw_null_reg(), mask), + payload, surface, desc); } static void brw_set_memory_fence_message(struct brw_codegen *p, struct brw_inst *insn, enum brw_message_target sfid, - bool commit_enable) + bool commit_enable, + unsigned bti) { const struct gen_device_info *devinfo = p->devinfo; @@ -3302,54 +3151,34 @@ brw_set_memory_fence_message(struct brw_codegen *p, if (commit_enable) brw_inst_set_dp_msg_control(devinfo, insn, 1 << 5); + + assert(devinfo->gen >= 11 || bti == 0); + brw_inst_set_binding_table_index(devinfo, insn, bti); } void brw_memory_fence(struct brw_codegen *p, struct brw_reg dst, - enum opcode send_op) + struct brw_reg src, + enum opcode send_op, + enum brw_message_target sfid, + bool commit_enable, + unsigned bti) { const struct gen_device_info *devinfo = p->devinfo; - const bool commit_enable = - devinfo->gen >= 10 || /* HSD ES # 1404612949 */ - (devinfo->gen == 7 && !devinfo->is_haswell); - struct brw_inst *insn; - brw_push_insn_state(p); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_set_default_exec_size(p, BRW_EXECUTE_1); - dst = vec1(dst); + dst = retype(vec1(dst), BRW_REGISTER_TYPE_UW); + src = retype(vec1(src), BRW_REGISTER_TYPE_UD); /* Set dst as destination for dependency tracking, the MEMORY_FENCE * message doesn't write anything back. */ - insn = next_insn(p, send_op); - dst = retype(dst, BRW_REGISTER_TYPE_UW); + struct brw_inst *insn = next_insn(p, send_op); + brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); + brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1); brw_set_dest(p, insn, dst); - brw_set_src0(p, insn, dst); - brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, - commit_enable); - - if (devinfo->gen == 7 && !devinfo->is_haswell) { - /* IVB does typed surface access through the render cache, so we need to - * flush it too. Use a different register so both flushes can be - * pipelined by the hardware. - */ - insn = next_insn(p, send_op); - brw_set_dest(p, insn, offset(dst, 1)); - brw_set_src0(p, insn, offset(dst, 1)); - brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE, - commit_enable); - - /* Now write the response of the second message into the response of the - * first to trigger a pipeline stall -- This way future render and data - * cache messages will be properly ordered with respect to past data and - * render cache messages. - */ - brw_MOV(p, dst, offset(dst, 1)); - } - - brw_pop_insn_state(p); + brw_set_src0(p, insn, src); + brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti); } void @@ -3363,25 +3192,24 @@ brw_pixel_interpolator_query(struct brw_codegen *p, unsigned response_length) { const struct gen_device_info *devinfo = p->devinfo; - struct brw_inst *insn; const uint16_t exec_size = brw_get_default_exec_size(p); - const uint16_t qtr_ctrl = brw_get_default_group(p) / 8; + const unsigned slot_group = brw_get_default_group(p) / 16; + const unsigned simd_mode = (exec_size == BRW_EXECUTE_16); + const unsigned desc = + brw_message_desc(devinfo, msg_length, response_length, false) | + brw_pixel_interp_desc(devinfo, mode, noperspective, simd_mode, + slot_group); /* brw_send_indirect_message will automatically use a direct send message * if data is actually immediate. */ - insn = brw_send_indirect_message(p, - GEN7_SFID_PIXEL_INTERPOLATOR, - dest, - mrf, - vec1(data), 0); - brw_inst_set_mlen(devinfo, insn, msg_length); - brw_inst_set_rlen(devinfo, insn, response_length); - - brw_inst_set_pi_simd_mode(devinfo, insn, exec_size == BRW_EXECUTE_16); - brw_inst_set_pi_slot_group(devinfo, insn, qtr_ctrl / 2); - brw_inst_set_pi_nopersp(devinfo, insn, noperspective); - brw_inst_set_pi_message_type(devinfo, insn, mode); + brw_send_indirect_message(p, + GEN7_SFID_PIXEL_INTERPOLATOR, + dest, + mrf, + vec1(data), + desc, + false); } void @@ -3398,6 +3226,14 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst, brw_push_insn_state(p); + /* The flag register is only used on Gen7 in align1 mode, so avoid setting + * unnecessary bits in the instruction words, get the information we need + * and reset the default flag register. This allows more instructions to be + * compacted. + */ + const unsigned flag_subreg = p->current->flag_subreg; + brw_set_default_flag_reg(p, 0, 0); + if (brw_get_default_access_mode(p) == BRW_ALIGN_1) { brw_set_default_mask_control(p, BRW_MASK_DISABLE); @@ -3421,6 +3257,7 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst, * hardware. */ brw_SHR(p, vec1(dst), mask, brw_imm_ud(qtr_control * 8)); + brw_set_default_swsb(p, tgl_swsb_regdist(1)); brw_AND(p, vec1(dst), exec_mask, vec1(dst)); exec_mask = vec1(dst); } @@ -3431,8 +3268,7 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst, */ inst = brw_FBL(p, vec1(dst), exec_mask); } else { - const struct brw_reg flag = brw_flag_reg(p->current->flag_subreg / 2, - p->current->flag_subreg % 2); + const struct brw_reg flag = brw_flag_subreg(flag_subreg); brw_set_default_exec_size(p, BRW_EXECUTE_1); brw_MOV(p, retype(flag, BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); @@ -3452,6 +3288,8 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst, brw_inst_set_group(devinfo, inst, lower_size * i + 8 * qtr_control); brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_Z); brw_inst_set_exec_size(devinfo, inst, cvt(lower_size) - 1); + brw_inst_set_flag_reg_nr(devinfo, inst, flag_subreg / 2); + brw_inst_set_flag_subreg_nr(devinfo, inst, flag_subreg % 2); } /* Find the first bit set in the exec_size-wide portion of the flag @@ -3555,7 +3393,7 @@ brw_broadcast(struct brw_codegen *p, /* Take into account the component size and horizontal stride. */ assert(src.vstride == src.hstride + src.width); brw_SHL(p, addr, vec1(idx), - brw_imm_ud(_mesa_logbase2(type_sz(src.type)) + + brw_imm_ud(util_logbase2(type_sz(src.type)) + src.hstride - 1)); /* We can only address up to limit bytes using the indirect @@ -3563,12 +3401,15 @@ brw_broadcast(struct brw_codegen *p, * register is above this limit. */ if (offset >= limit) { + brw_set_default_swsb(p, tgl_swsb_regdist(1)); brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit)); offset = offset % limit; } brw_pop_insn_state(p); + brw_set_default_swsb(p, tgl_swsb_regdist(1)); + /* Use indirect addressing to fetch the specified component. */ if (type_sz(src.type) > 4 && (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) { @@ -3587,6 +3428,7 @@ brw_broadcast(struct brw_codegen *p, brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0), retype(brw_vec1_indirect(addr.subnr, offset), BRW_REGISTER_TYPE_D)); + brw_set_default_swsb(p, tgl_swsb_null()); brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1), retype(brw_vec1_indirect(addr.subnr, offset + 4), BRW_REGISTER_TYPE_D)); @@ -3656,11 +3498,12 @@ void brw_shader_time_add(struct brw_codegen *p, BRW_ARF_NULL, 0)); brw_set_src0(p, send, brw_vec1_reg(payload.file, payload.nr, 0)); - brw_set_src1(p, send, brw_imm_ud(0)); - brw_set_desc(p, send, brw_message_desc(devinfo, 2, 0, false)); + brw_set_desc(p, send, (brw_message_desc(devinfo, 2, 0, false) | + brw_dp_untyped_atomic_desc(devinfo, 1, BRW_AOP_ADD, + false))); + brw_inst_set_sfid(devinfo, send, sfid); brw_inst_set_binding_table_index(devinfo, send, surf_index); - brw_set_dp_untyped_atomic_message(p, send, BRW_AOP_ADD, false); brw_pop_insn_state(p); } @@ -3686,7 +3529,6 @@ brw_barrier(struct brw_codegen *p, struct brw_reg src) brw_set_desc(p, inst, brw_message_desc(devinfo, 1, 0, false)); brw_inst_set_sfid(devinfo, inst, BRW_SFID_MESSAGE_GATEWAY); - brw_inst_set_gateway_notify(devinfo, inst, 1); brw_inst_set_gateway_subfuncid(devinfo, inst, BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG); @@ -3715,37 +3557,35 @@ brw_WAIT(struct brw_codegen *p) brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); } -/** - * Changes the floating point rounding mode updating the control register - * field defined at cr0.0[5-6] bits. This function supports the changes to - * RTNE (00), RU (01), RD (10) and RTZ (11) rounding using bitwise operations. - * Only RTNE and RTZ rounding are enabled at nir. - */ void -brw_rounding_mode(struct brw_codegen *p, - enum brw_rnd_mode mode) -{ - const unsigned bits = mode << BRW_CR0_RND_MODE_SHIFT; - - if (bits != BRW_CR0_RND_MODE_MASK) { - brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), - brw_imm_ud(~BRW_CR0_RND_MODE_MASK)); - brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1); - - /* From the Skylake PRM, Volume 7, page 760: - * "Implementation Restriction on Register Access: When the control - * register is used as an explicit source and/or destination, hardware - * does not ensure execution pipeline coherency. Software must set the - * thread control field to ‘switch’ for an instruction that uses - * control register as an explicit operand." - */ - brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH); - } +brw_float_controls_mode(struct brw_codegen *p, + unsigned mode, unsigned mask) +{ + /* From the Skylake PRM, Volume 7, page 760: + * "Implementation Restriction on Register Access: When the control + * register is used as an explicit source and/or destination, hardware + * does not ensure execution pipeline coherency. Software must set the + * thread control field to ‘switch’ for an instruction that uses + * control register as an explicit operand." + * + * On Gen12+ this is implemented in terms of SWSB annotations instead. + */ + brw_set_default_swsb(p, tgl_swsb_regdist(1)); - if (bits) { - brw_inst *inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), - brw_imm_ud(bits)); - brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1); + brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), + brw_imm_ud(~mask)); + brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1); + if (p->devinfo->gen < 12) brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH); + + if (mode) { + brw_inst *inst_or = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), + brw_imm_ud(mode)); + brw_inst_set_exec_size(p->devinfo, inst_or, BRW_EXECUTE_1); + if (p->devinfo->gen < 12) + brw_inst_set_thread_control(p->devinfo, inst_or, BRW_THREAD_SWITCH); } + + if (p->devinfo->gen >= 12) + brw_SYNC(p, TGL_SYNC_NOP); }