X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Fintel%2Fcompiler%2Fbrw_eu_emit.c;h=0d5c755f9e8d36e9ee1b7db70af4b2b63fc9aed6;hp=5fd9a2b3fe1edb286ad776ff7c5dac93ce0fa006;hb=c883c482be2ab4ab65e4905a447a0edf3e18418b;hpb=e15dac319b6cde33adc203fea2e9e31f966d20ca diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 5fd9a2b3fe1..0d5c755f9e8 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -55,6 +55,7 @@ gen6_resolve_implied_move(struct brw_codegen *p, return; if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { + assert(devinfo->gen < 12); brw_push_insn_state(p); brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_mask_control(p, BRW_MASK_DISABLE); @@ -91,72 +92,117 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest) if (dest.file == BRW_MESSAGE_REGISTER_FILE) assert((dest.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen)); - else if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE) + else if (dest.file == BRW_GENERAL_REGISTER_FILE) assert(dest.nr < 128); - gen7_convert_mrf_to_grf(p, &dest); + /* The hardware has a restriction where a destination of size Byte with + * a stride of 1 is only allowed for a packed byte MOV. For any other + * instruction, the stride must be at least 2, even when the destination + * is the NULL register. + */ + if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && + dest.nr == BRW_ARF_NULL && + type_sz(dest.type) == 1 && + dest.hstride == BRW_HORIZONTAL_STRIDE_1) { + dest.hstride = BRW_HORIZONTAL_STRIDE_2; + } - brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type); - brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode); + gen7_convert_mrf_to_grf(p, &dest); - if (dest.address_mode == BRW_ADDRESS_DIRECT) { + if (devinfo->gen >= 12 && + (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) { + assert(dest.file == BRW_GENERAL_REGISTER_FILE || + dest.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(dest.address_mode == BRW_ADDRESS_DIRECT); + assert(dest.subnr == 0); + assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 || + (dest.hstride == BRW_HORIZONTAL_STRIDE_1 && + dest.vstride == dest.width + 1)); + assert(!dest.negate && !dest.abs); + brw_inst_set_dst_reg_file(devinfo, inst, dest.file); brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr); - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_inst_set_dst_da1_subreg_nr(devinfo, inst, dest.subnr); - if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) - dest.hstride = BRW_HORIZONTAL_STRIDE_1; - brw_inst_set_dst_hstride(devinfo, inst, dest.hstride); - } else { - brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16); - brw_inst_set_da16_writemask(devinfo, inst, dest.writemask); - if (dest.file == BRW_GENERAL_REGISTER_FILE || - dest.file == BRW_MESSAGE_REGISTER_FILE) { - assert(dest.writemask != 0); - } - /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1: - * Although Dst.HorzStride is a don't care for Align16, HW needs - * this to be programmed as "01". - */ - brw_inst_set_dst_hstride(devinfo, inst, 1); - } + } else if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) { + assert(devinfo->gen < 12); + assert(dest.file == BRW_GENERAL_REGISTER_FILE || + dest.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(dest.address_mode == BRW_ADDRESS_DIRECT); + assert(dest.subnr % 16 == 0); + assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 && + dest.vstride == dest.width + 1); + assert(!dest.negate && !dest.abs); + brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr); + brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16); + brw_inst_set_send_dst_reg_file(devinfo, inst, dest.file); } else { - brw_inst_set_dst_ia_subreg_nr(devinfo, inst, dest.subnr); + brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type); + brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode); - /* These are different sizes in align1 vs align16: - */ - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_inst_set_dst_ia1_addr_imm(devinfo, inst, - dest.indirect_offset); - if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) - dest.hstride = BRW_HORIZONTAL_STRIDE_1; - brw_inst_set_dst_hstride(devinfo, inst, dest.hstride); + if (dest.address_mode == BRW_ADDRESS_DIRECT) { + brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr); + + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + brw_inst_set_dst_da1_subreg_nr(devinfo, inst, dest.subnr); + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + brw_inst_set_dst_hstride(devinfo, inst, dest.hstride); + } else { + brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16); + brw_inst_set_da16_writemask(devinfo, inst, dest.writemask); + if (dest.file == BRW_GENERAL_REGISTER_FILE || + dest.file == BRW_MESSAGE_REGISTER_FILE) { + assert(dest.writemask != 0); + } + /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1: + * Although Dst.HorzStride is a don't care for Align16, HW needs + * this to be programmed as "01". + */ + brw_inst_set_dst_hstride(devinfo, inst, 1); + } } else { - brw_inst_set_dst_ia16_addr_imm(devinfo, inst, - dest.indirect_offset); - /* even ignored in da16, still need to set as '01' */ - brw_inst_set_dst_hstride(devinfo, inst, 1); + brw_inst_set_dst_ia_subreg_nr(devinfo, inst, dest.subnr); + + /* These are different sizes in align1 vs align16: + */ + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + brw_inst_set_dst_ia1_addr_imm(devinfo, inst, + dest.indirect_offset); + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + brw_inst_set_dst_hstride(devinfo, inst, dest.hstride); + } else { + brw_inst_set_dst_ia16_addr_imm(devinfo, inst, + dest.indirect_offset); + /* even ignored in da16, still need to set as '01' */ + brw_inst_set_dst_hstride(devinfo, inst, 1); + } } } /* Generators should set a default exec_size of either 8 (SIMD4x2 or SIMD8) * or 16 (SIMD16), as that's normally correct. However, when dealing with - * small registers, we automatically reduce it to match the register size. - * - * In platforms that support fp64 we can emit instructions with a width of - * 4 that need two SIMD8 registers and an exec_size of 8 or 16. In these - * cases we need to make sure that these instructions have their exec sizes - * set properly when they are emitted and we can't rely on this code to fix - * it. + * small registers, it can be useful for us to automatically reduce it to + * match the register size. */ - bool fix_exec_size; - if (devinfo->gen >= 6) - fix_exec_size = dest.width < BRW_EXECUTE_4; - else - fix_exec_size = dest.width < BRW_EXECUTE_8; + if (p->automatic_exec_sizes) { + /* + * In platforms that support fp64 we can emit instructions with a width + * of 4 that need two SIMD8 registers and an exec_size of 8 or 16. In + * these cases we need to make sure that these instructions have their + * exec sizes set properly when they are emitted and we can't rely on + * this code to fix it. + */ + bool fix_exec_size; + if (devinfo->gen >= 6) + fix_exec_size = dest.width < BRW_EXECUTE_4; + else + fix_exec_size = dest.width < BRW_EXECUTE_8; - if (fix_exec_size) - brw_inst_set_exec_size(devinfo, inst, dest.width); + if (fix_exec_size) + brw_inst_set_exec_size(devinfo, inst, dest.width); + } } void @@ -166,13 +212,16 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) if (reg.file == BRW_MESSAGE_REGISTER_FILE) assert((reg.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen)); - else if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE) + else if (reg.file == BRW_GENERAL_REGISTER_FILE) assert(reg.nr < 128); gen7_convert_mrf_to_grf(p, ®); - if (devinfo->gen >= 6 && (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || - brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) { + if (devinfo->gen >= 6 && + (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC)) { /* Any source modifiers or regions will be ignored, since this just * identifies the MRF/GRF to start reading the message contents from. * Check for some likely failures. @@ -182,84 +231,110 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) assert(reg.address_mode == BRW_ADDRESS_DIRECT); } - brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type); - brw_inst_set_src0_abs(devinfo, inst, reg.abs); - brw_inst_set_src0_negate(devinfo, inst, reg.negate); - brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode); - - if (reg.file == BRW_IMMEDIATE_VALUE) { - if (reg.type == BRW_REGISTER_TYPE_DF || - brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DIM) - brw_inst_set_imm_df(devinfo, inst, reg.df); - else if (reg.type == BRW_REGISTER_TYPE_UQ || - reg.type == BRW_REGISTER_TYPE_Q) - brw_inst_set_imm_uq(devinfo, inst, reg.u64); - else - brw_inst_set_imm_ud(devinfo, inst, reg.ud); - - if (type_sz(reg.type) < 8) { - brw_inst_set_src1_reg_file(devinfo, inst, - BRW_ARCHITECTURE_REGISTER_FILE); - brw_inst_set_src1_reg_hw_type(devinfo, inst, - brw_inst_src0_reg_hw_type(devinfo, inst)); - } + if (devinfo->gen >= 12 && + (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) { + assert(reg.file != BRW_IMMEDIATE_VALUE); + assert(reg.address_mode == BRW_ADDRESS_DIRECT); + assert(reg.subnr == 0); + assert(has_scalar_region(reg) || + (reg.hstride == BRW_HORIZONTAL_STRIDE_1 && + reg.vstride == reg.width + 1)); + assert(!reg.negate && !reg.abs); + brw_inst_set_send_src0_reg_file(devinfo, inst, reg.file); + brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr); + + } else if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) { + assert(reg.file == BRW_GENERAL_REGISTER_FILE); + assert(reg.address_mode == BRW_ADDRESS_DIRECT); + assert(reg.subnr % 16 == 0); + assert(has_scalar_region(reg) || + (reg.hstride == BRW_HORIZONTAL_STRIDE_1 && + reg.vstride == reg.width + 1)); + assert(!reg.negate && !reg.abs); + brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr); + brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16); } else { - if (reg.address_mode == BRW_ADDRESS_DIRECT) { - brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr); - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_inst_set_src0_da1_subreg_nr(devinfo, inst, reg.subnr); - } else { - brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16); - } + brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type); + brw_inst_set_src0_abs(devinfo, inst, reg.abs); + brw_inst_set_src0_negate(devinfo, inst, reg.negate); + brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode); + + if (reg.file == BRW_IMMEDIATE_VALUE) { + if (reg.type == BRW_REGISTER_TYPE_DF || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DIM) + brw_inst_set_imm_df(devinfo, inst, reg.df); + else if (reg.type == BRW_REGISTER_TYPE_UQ || + reg.type == BRW_REGISTER_TYPE_Q) + brw_inst_set_imm_uq(devinfo, inst, reg.u64); + else + brw_inst_set_imm_ud(devinfo, inst, reg.ud); + + if (devinfo->gen < 12 && type_sz(reg.type) < 8) { + brw_inst_set_src1_reg_file(devinfo, inst, + BRW_ARCHITECTURE_REGISTER_FILE); + brw_inst_set_src1_reg_hw_type(devinfo, inst, + brw_inst_src0_reg_hw_type(devinfo, inst)); + } } else { - brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr); + if (reg.address_mode == BRW_ADDRESS_DIRECT) { + brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr); + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + brw_inst_set_src0_da1_subreg_nr(devinfo, inst, reg.subnr); + } else { + brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16); + } + } else { + brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr); - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset); - } else { - brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset); - } - } + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset); + } else { + brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset); + } + } - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - if (reg.width == BRW_WIDTH_1 && - brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) { - brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0); - brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1); - brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0); - } else { - brw_inst_set_src0_hstride(devinfo, inst, reg.hstride); - brw_inst_set_src0_width(devinfo, inst, reg.width); - brw_inst_set_src0_vstride(devinfo, inst, reg.vstride); - } - } else { - brw_inst_set_src0_da16_swiz_x(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X)); - brw_inst_set_src0_da16_swiz_y(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y)); - brw_inst_set_src0_da16_swiz_z(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z)); - brw_inst_set_src0_da16_swiz_w(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W)); - - if (reg.vstride == BRW_VERTICAL_STRIDE_8) { - /* This is an oddity of the fact we're using the same - * descriptions for registers in align_16 as align_1: - */ - brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); - } else if (devinfo->gen == 7 && !devinfo->is_haswell && - reg.type == BRW_REGISTER_TYPE_DF && - reg.vstride == BRW_VERTICAL_STRIDE_2) { - /* From SNB PRM: - * - * "For Align16 access mode, only encodings of 0000 and 0011 - * are allowed. Other codes are reserved." - * - * Presumably the DevSNB behavior applies to IVB as well. - */ - brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) { + brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0); + brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1); + brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0); + } else { + brw_inst_set_src0_hstride(devinfo, inst, reg.hstride); + brw_inst_set_src0_width(devinfo, inst, reg.width); + brw_inst_set_src0_vstride(devinfo, inst, reg.vstride); + } } else { - brw_inst_set_src0_vstride(devinfo, inst, reg.vstride); + brw_inst_set_src0_da16_swiz_x(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X)); + brw_inst_set_src0_da16_swiz_y(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y)); + brw_inst_set_src0_da16_swiz_z(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z)); + brw_inst_set_src0_da16_swiz_w(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W)); + + if (reg.vstride == BRW_VERTICAL_STRIDE_8) { + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + } else if (devinfo->gen == 7 && !devinfo->is_haswell && + reg.type == BRW_REGISTER_TYPE_DF && + reg.vstride == BRW_VERTICAL_STRIDE_2) { + /* From SNB PRM: + * + * "For Align16 access mode, only encodings of 0000 and 0011 + * are allowed. Other codes are reserved." + * + * Presumably the DevSNB behavior applies to IVB as well. + */ + brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + } else { + brw_inst_set_src0_vstride(devinfo, inst, reg.vstride); + } } } } @@ -271,130 +346,124 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) { const struct gen_device_info *devinfo = p->devinfo; - if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE) + if (reg.file == BRW_GENERAL_REGISTER_FILE) assert(reg.nr < 128); - /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5: - * - * "Accumulator registers may be accessed explicitly as src0 - * operands only." - */ - assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE || - reg.nr != BRW_ARF_ACCUMULATOR); - - gen7_convert_mrf_to_grf(p, ®); - assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC || + (devinfo->gen >= 12 && + (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC))) { + assert(reg.file == BRW_GENERAL_REGISTER_FILE || + reg.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(reg.address_mode == BRW_ADDRESS_DIRECT); + assert(reg.subnr == 0); + assert(has_scalar_region(reg) || + (reg.hstride == BRW_HORIZONTAL_STRIDE_1 && + reg.vstride == reg.width + 1)); + assert(!reg.negate && !reg.abs); + brw_inst_set_send_src1_reg_nr(devinfo, inst, reg.nr); + brw_inst_set_send_src1_reg_file(devinfo, inst, reg.file); + } else { + /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5: + * + * "Accumulator registers may be accessed explicitly as src0 + * operands only." + */ + assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE || + reg.nr != BRW_ARF_ACCUMULATOR); - brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type); - brw_inst_set_src1_abs(devinfo, inst, reg.abs); - brw_inst_set_src1_negate(devinfo, inst, reg.negate); + gen7_convert_mrf_to_grf(p, ®); + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); - /* Only src1 can be immediate in two-argument instructions. - */ - assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE); + brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type); + brw_inst_set_src1_abs(devinfo, inst, reg.abs); + brw_inst_set_src1_negate(devinfo, inst, reg.negate); - if (reg.file == BRW_IMMEDIATE_VALUE) { - /* two-argument instructions can only use 32-bit immediates */ - assert(type_sz(reg.type) < 8); - brw_inst_set_imm_ud(devinfo, inst, reg.ud); - } else { - /* This is a hardware restriction, which may or may not be lifted - * in the future: + /* Only src1 can be immediate in two-argument instructions. */ - assert (reg.address_mode == BRW_ADDRESS_DIRECT); - /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ + assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE); - brw_inst_set_src1_da_reg_nr(devinfo, inst, reg.nr); - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_inst_set_src1_da1_subreg_nr(devinfo, inst, reg.subnr); + if (reg.file == BRW_IMMEDIATE_VALUE) { + /* two-argument instructions can only use 32-bit immediates */ + assert(type_sz(reg.type) < 8); + brw_inst_set_imm_ud(devinfo, inst, reg.ud); } else { - brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16); - } + /* This is a hardware restriction, which may or may not be lifted + * in the future: + */ + assert (reg.address_mode == BRW_ADDRESS_DIRECT); + /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - if (reg.width == BRW_WIDTH_1 && - brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) { - brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0); - brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1); - brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0); - } else { - brw_inst_set_src1_hstride(devinfo, inst, reg.hstride); - brw_inst_set_src1_width(devinfo, inst, reg.width); - brw_inst_set_src1_vstride(devinfo, inst, reg.vstride); - } - } else { - brw_inst_set_src1_da16_swiz_x(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X)); - brw_inst_set_src1_da16_swiz_y(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y)); - brw_inst_set_src1_da16_swiz_z(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z)); - brw_inst_set_src1_da16_swiz_w(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W)); - - if (reg.vstride == BRW_VERTICAL_STRIDE_8) { - /* This is an oddity of the fact we're using the same - * descriptions for registers in align_16 as align_1: - */ - brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); - } else if (devinfo->gen == 7 && !devinfo->is_haswell && - reg.type == BRW_REGISTER_TYPE_DF && - reg.vstride == BRW_VERTICAL_STRIDE_2) { - /* From SNB PRM: - * - * "For Align16 access mode, only encodings of 0000 and 0011 - * are allowed. Other codes are reserved." - * - * Presumably the DevSNB behavior applies to IVB as well. - */ - brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + brw_inst_set_src1_da_reg_nr(devinfo, inst, reg.nr); + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + brw_inst_set_src1_da1_subreg_nr(devinfo, inst, reg.subnr); } else { - brw_inst_set_src1_vstride(devinfo, inst, reg.vstride); + brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16); + } + + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) { + brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0); + brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1); + brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0); + } else { + brw_inst_set_src1_hstride(devinfo, inst, reg.hstride); + brw_inst_set_src1_width(devinfo, inst, reg.width); + brw_inst_set_src1_vstride(devinfo, inst, reg.vstride); + } + } else { + brw_inst_set_src1_da16_swiz_x(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X)); + brw_inst_set_src1_da16_swiz_y(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y)); + brw_inst_set_src1_da16_swiz_z(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z)); + brw_inst_set_src1_da16_swiz_w(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W)); + + if (reg.vstride == BRW_VERTICAL_STRIDE_8) { + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + } else if (devinfo->gen == 7 && !devinfo->is_haswell && + reg.type == BRW_REGISTER_TYPE_DF && + reg.vstride == BRW_VERTICAL_STRIDE_2) { + /* From SNB PRM: + * + * "For Align16 access mode, only encodings of 0000 and 0011 + * are allowed. Other codes are reserved." + * + * Presumably the DevSNB behavior applies to IVB as well. + */ + brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + } else { + brw_inst_set_src1_vstride(devinfo, inst, reg.vstride); + } } } } } /** - * Set the Message Descriptor and Extended Message Descriptor fields - * for SEND messages. - * - * \note This zeroes out the Function Control bits, so it must be called - * \b before filling out any message-specific data. Callers can - * choose not to fill in irrelevant bits; they will be zero. + * Specify the descriptor and extended descriptor immediate for a SEND(C) + * message instruction. */ void -brw_set_message_descriptor(struct brw_codegen *p, - brw_inst *inst, - enum brw_message_target sfid, - unsigned msg_length, - unsigned response_length, - bool header_present, - bool end_of_thread) +brw_set_desc_ex(struct brw_codegen *p, brw_inst *inst, + unsigned desc, unsigned ex_desc) { const struct gen_device_info *devinfo = p->devinfo; - - brw_set_src1(p, inst, brw_imm_d(0)); - - /* For indirect sends, `inst` will not be the SEND/SENDC instruction - * itself; instead, it will be a MOV/OR into the address register. - * - * In this case, we avoid setting the extended message descriptor bits, - * since they go on the later SEND/SENDC instead and if set here would - * instead clobber the conditionalmod bits. - */ - unsigned opcode = brw_inst_opcode(devinfo, inst); - if (opcode == BRW_OPCODE_SEND || opcode == BRW_OPCODE_SENDC) { - brw_inst_set_sfid(devinfo, inst, sfid); - } - - brw_inst_set_mlen(devinfo, inst, msg_length); - brw_inst_set_rlen(devinfo, inst, response_length); - brw_inst_set_eot(devinfo, inst, end_of_thread); - - if (devinfo->gen >= 5) { - brw_inst_set_header_present(devinfo, inst, header_present); - } + assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC); + if (devinfo->gen < 12) + brw_inst_set_src1_file_type(devinfo, inst, + BRW_IMMEDIATE_VALUE, BRW_REGISTER_TYPE_UD); + brw_inst_set_send_desc(devinfo, inst, desc); + if (devinfo->gen >= 9) + brw_inst_set_send_ex_desc(devinfo, inst, ex_desc); } static void brw_set_math_message( struct brw_codegen *p, @@ -432,9 +501,10 @@ static void brw_set_math_message( struct brw_codegen *p, break; } + brw_set_desc(p, inst, brw_message_desc( + devinfo, msg_length, response_length, false)); - brw_set_message_descriptor(p, inst, BRW_SFID_MATH, - msg_length, response_length, false, false); + brw_inst_set_sfid(devinfo, inst, BRW_SFID_MATH); brw_inst_set_math_msg_function(devinfo, inst, function); brw_inst_set_math_msg_signed_int(devinfo, inst, integer_type); brw_inst_set_math_msg_precision(devinfo, inst, low_precision); @@ -452,8 +522,11 @@ static void brw_set_ff_sync_message(struct brw_codegen *p, { const struct gen_device_info *devinfo = p->devinfo; - brw_set_message_descriptor(p, insn, BRW_SFID_URB, - 1, response_length, true, end_of_thread); + brw_set_desc(p, insn, brw_message_desc( + devinfo, 1, response_length, true)); + + brw_inst_set_sfid(devinfo, insn, BRW_SFID_URB); + brw_inst_set_eot(devinfo, insn, end_of_thread); brw_inst_set_urb_opcode(devinfo, insn, 1); /* FF_SYNC */ brw_inst_set_urb_allocate(devinfo, insn, allocate); /* The following fields are not used by FF_SYNC: */ @@ -477,9 +550,11 @@ static void brw_set_urb_message( struct brw_codegen *p, assert(devinfo->gen < 7 || !(flags & BRW_URB_WRITE_ALLOCATE)); assert(devinfo->gen >= 7 || !(flags & BRW_URB_WRITE_PER_SLOT_OFFSET)); - brw_set_message_descriptor(p, insn, BRW_SFID_URB, - msg_length, response_length, true, - flags & BRW_URB_WRITE_EOT); + brw_set_desc(p, insn, brw_message_desc( + devinfo, msg_length, response_length, true)); + + brw_inst_set_sfid(devinfo, insn, BRW_SFID_URB); + brw_inst_set_eot(devinfo, insn, !!(flags & BRW_URB_WRITE_EOT)); if (flags & BRW_URB_WRITE_OWORD) { assert(msg_length == 2); /* header + one OWORD of data */ @@ -504,88 +579,6 @@ static void brw_set_urb_message( struct brw_codegen *p, } } -void -brw_set_dp_write_message(struct brw_codegen *p, - brw_inst *insn, - unsigned binding_table_index, - unsigned msg_control, - unsigned msg_type, - unsigned target_cache, - unsigned msg_length, - bool header_present, - unsigned last_render_target, - unsigned response_length, - unsigned end_of_thread, - unsigned send_commit_msg) -{ - const struct gen_device_info *devinfo = p->devinfo; - const unsigned sfid = (devinfo->gen >= 6 ? target_cache : - BRW_SFID_DATAPORT_WRITE); - - brw_set_message_descriptor(p, insn, sfid, msg_length, response_length, - header_present, end_of_thread); - - brw_inst_set_binding_table_index(devinfo, insn, binding_table_index); - brw_inst_set_dp_write_msg_type(devinfo, insn, msg_type); - brw_inst_set_dp_write_msg_control(devinfo, insn, msg_control); - brw_inst_set_rt_last(devinfo, insn, last_render_target); - if (devinfo->gen < 7) { - brw_inst_set_dp_write_commit(devinfo, insn, send_commit_msg); - } -} - -void -brw_set_dp_read_message(struct brw_codegen *p, - brw_inst *insn, - unsigned binding_table_index, - unsigned msg_control, - unsigned msg_type, - unsigned target_cache, - unsigned msg_length, - bool header_present, - unsigned response_length) -{ - const struct gen_device_info *devinfo = p->devinfo; - const unsigned sfid = (devinfo->gen >= 6 ? target_cache : - BRW_SFID_DATAPORT_READ); - - brw_set_message_descriptor(p, insn, sfid, msg_length, response_length, - header_present, false); - - brw_inst_set_binding_table_index(devinfo, insn, binding_table_index); - brw_inst_set_dp_read_msg_type(devinfo, insn, msg_type); - brw_inst_set_dp_read_msg_control(devinfo, insn, msg_control); - if (devinfo->gen < 6) - brw_inst_set_dp_read_target_cache(devinfo, insn, target_cache); -} - -void -brw_set_sampler_message(struct brw_codegen *p, - brw_inst *inst, - unsigned binding_table_index, - unsigned sampler, - unsigned msg_type, - unsigned response_length, - unsigned msg_length, - unsigned header_present, - unsigned simd_mode, - unsigned return_format) -{ - const struct gen_device_info *devinfo = p->devinfo; - - brw_set_message_descriptor(p, inst, BRW_SFID_SAMPLER, msg_length, - response_length, header_present, false); - - brw_inst_set_binding_table_index(devinfo, inst, binding_table_index); - brw_inst_set_sampler(devinfo, inst, sampler); - brw_inst_set_sampler_msg_type(devinfo, inst, msg_type); - if (devinfo->gen >= 5) { - brw_inst_set_sampler_simd_mode(devinfo, inst, simd_mode); - } else if (devinfo->gen == 4 && !devinfo->is_g4x) { - brw_inst_set_sampler_return_format(devinfo, inst, return_format); - } -} - static void gen7_set_dp_scratch_message(struct brw_codegen *p, brw_inst *inst, @@ -601,11 +594,13 @@ gen7_set_dp_scratch_message(struct brw_codegen *p, const struct gen_device_info *devinfo = p->devinfo; assert(num_regs == 1 || num_regs == 2 || num_regs == 4 || (devinfo->gen >= 8 && num_regs == 8)); - const unsigned block_size = (devinfo->gen >= 8 ? _mesa_logbase2(num_regs) : + const unsigned block_size = (devinfo->gen >= 8 ? util_logbase2(num_regs) : num_regs - 1); - brw_set_message_descriptor(p, inst, GEN7_SFID_DATAPORT_DATA_CACHE, - mlen, rlen, header_present, false); + brw_set_desc(p, inst, brw_message_desc( + devinfo, mlen, rlen, header_present)); + + brw_inst_set_sfid(devinfo, inst, GEN7_SFID_DATAPORT_DATA_CACHE); brw_inst_set_dp_category(devinfo, inst, 1); /* Scratch Block Read/Write msgs */ brw_inst_set_scratch_read_write(devinfo, inst, write); brw_inst_set_scratch_type(devinfo, inst, dword); @@ -614,6 +609,37 @@ gen7_set_dp_scratch_message(struct brw_codegen *p, brw_inst_set_scratch_addr_offset(devinfo, inst, addr_offset); } +static void +brw_inst_set_state(const struct gen_device_info *devinfo, + brw_inst *insn, + const struct brw_insn_state *state) +{ + brw_inst_set_exec_size(devinfo, insn, state->exec_size); + brw_inst_set_group(devinfo, insn, state->group); + brw_inst_set_compression(devinfo, insn, state->compressed); + brw_inst_set_access_mode(devinfo, insn, state->access_mode); + brw_inst_set_mask_control(devinfo, insn, state->mask_control); + if (devinfo->gen >= 12) + brw_inst_set_swsb(devinfo, insn, tgl_swsb_encode(state->swsb)); + brw_inst_set_saturate(devinfo, insn, state->saturate); + brw_inst_set_pred_control(devinfo, insn, state->predicate); + brw_inst_set_pred_inv(devinfo, insn, state->pred_inv); + + if (is_3src(devinfo, brw_inst_opcode(devinfo, insn)) && + state->access_mode == BRW_ALIGN_16) { + brw_inst_set_3src_a16_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2); + if (devinfo->gen >= 7) + brw_inst_set_3src_a16_flag_reg_nr(devinfo, insn, state->flag_subreg / 2); + } else { + brw_inst_set_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2); + if (devinfo->gen >= 7) + brw_inst_set_flag_reg_nr(devinfo, insn, state->flag_subreg / 2); + } + + if (devinfo->gen >= 6) + brw_inst_set_acc_wr_control(devinfo, insn, state->acc_wr_control); +} + #define next_insn brw_next_insn brw_inst * brw_next_insn(struct brw_codegen *p, unsigned opcode) @@ -628,9 +654,13 @@ brw_next_insn(struct brw_codegen *p, unsigned opcode) p->next_insn_offset += 16; insn = &p->store[p->nr_insn++]; - memcpy(insn, p->current, sizeof(*insn)); + memset(insn, 0, sizeof(*insn)); brw_inst_set_opcode(devinfo, insn, opcode); + + /* Apply the default instruction state */ + brw_inst_set_state(devinfo, insn, p->current); + return insn; } @@ -669,6 +699,47 @@ get_3src_subreg_nr(struct brw_reg reg) return reg.subnr / 4; } +static enum gen10_align1_3src_vertical_stride +to_3src_align1_vstride(const struct gen_device_info *devinfo, + enum brw_vertical_stride vstride) +{ + switch (vstride) { + case BRW_VERTICAL_STRIDE_0: + return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0; + case BRW_VERTICAL_STRIDE_1: + assert(devinfo->gen >= 12); + return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_1; + case BRW_VERTICAL_STRIDE_2: + assert(devinfo->gen < 12); + return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2; + case BRW_VERTICAL_STRIDE_4: + return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4; + case BRW_VERTICAL_STRIDE_8: + case BRW_VERTICAL_STRIDE_16: + return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8; + default: + unreachable("invalid vstride"); + } +} + + +static enum gen10_align1_3src_src_horizontal_stride +to_3src_align1_hstride(enum brw_horizontal_stride hstride) +{ + switch (hstride) { + case BRW_HORIZONTAL_STRIDE_0: + return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0; + case BRW_HORIZONTAL_STRIDE_1: + return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1; + case BRW_HORIZONTAL_STRIDE_2: + return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2; + case BRW_HORIZONTAL_STRIDE_4: + return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4; + default: + unreachable("invalid hstride"); + } +} + static brw_inst * brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, struct brw_reg src0, struct brw_reg src1, struct brw_reg src2) @@ -678,83 +749,203 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, gen7_convert_mrf_to_grf(p, &dest); - assert(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16); - - assert(dest.file == BRW_GENERAL_REGISTER_FILE || - dest.file == BRW_MESSAGE_REGISTER_FILE); assert(dest.nr < 128); - assert(dest.address_mode == BRW_ADDRESS_DIRECT); - assert(dest.type == BRW_REGISTER_TYPE_F || - dest.type == BRW_REGISTER_TYPE_DF || - dest.type == BRW_REGISTER_TYPE_D || - dest.type == BRW_REGISTER_TYPE_UD); - if (devinfo->gen == 6) { - brw_inst_set_3src_a16_dst_reg_file(devinfo, inst, - dest.file == BRW_MESSAGE_REGISTER_FILE); - } - brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr); - brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 16); - brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask); - assert(src0.file == BRW_GENERAL_REGISTER_FILE); + if (devinfo->gen >= 10) + assert(!(src0.file == BRW_IMMEDIATE_VALUE && + src2.file == BRW_IMMEDIATE_VALUE)); + + assert(src0.file == BRW_IMMEDIATE_VALUE || src0.nr < 128); + assert(src1.file != BRW_IMMEDIATE_VALUE && src1.nr < 128); + assert(src2.file == BRW_IMMEDIATE_VALUE || src2.nr < 128); + assert(dest.address_mode == BRW_ADDRESS_DIRECT); assert(src0.address_mode == BRW_ADDRESS_DIRECT); - assert(src0.nr < 128); - brw_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle); - brw_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, get_3src_subreg_nr(src0)); - brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr); - brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs); - brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate); - brw_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst, - src0.vstride == BRW_VERTICAL_STRIDE_0); - - assert(src1.file == BRW_GENERAL_REGISTER_FILE); assert(src1.address_mode == BRW_ADDRESS_DIRECT); - assert(src1.nr < 128); - brw_inst_set_3src_a16_src1_swizzle(devinfo, inst, src1.swizzle); - brw_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, get_3src_subreg_nr(src1)); - brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr); - brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs); - brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate); - brw_inst_set_3src_a16_src1_rep_ctrl(devinfo, inst, - src1.vstride == BRW_VERTICAL_STRIDE_0); - - assert(src2.file == BRW_GENERAL_REGISTER_FILE); assert(src2.address_mode == BRW_ADDRESS_DIRECT); - assert(src2.nr < 128); - brw_inst_set_3src_a16_src2_swizzle(devinfo, inst, src2.swizzle); - brw_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, get_3src_subreg_nr(src2)); - brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr); - brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs); - brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate); - brw_inst_set_3src_a16_src2_rep_ctrl(devinfo, inst, - src2.vstride == BRW_VERTICAL_STRIDE_0); - - if (devinfo->gen >= 7) { - /* Set both the source and destination types based on dest.type, - * ignoring the source register types. The MAD and LRP emitters ensure - * that all four types are float. The BFE and BFI2 emitters, however, - * may send us mixed D and UD types and want us to ignore that and use - * the destination type. - */ - switch (dest.type) { - case BRW_REGISTER_TYPE_F: - brw_inst_set_3src_a16_src_hw_type(devinfo, inst, BRW_3SRC_TYPE_F); - brw_inst_set_3src_a16_dst_hw_type(devinfo, inst, BRW_3SRC_TYPE_F); - break; - case BRW_REGISTER_TYPE_DF: - brw_inst_set_3src_a16_src_hw_type(devinfo, inst, BRW_3SRC_TYPE_DF); - brw_inst_set_3src_a16_dst_hw_type(devinfo, inst, BRW_3SRC_TYPE_DF); - break; - case BRW_REGISTER_TYPE_D: - brw_inst_set_3src_a16_src_hw_type(devinfo, inst, BRW_3SRC_TYPE_D); - brw_inst_set_3src_a16_dst_hw_type(devinfo, inst, BRW_3SRC_TYPE_D); - break; - case BRW_REGISTER_TYPE_UD: - brw_inst_set_3src_a16_src_hw_type(devinfo, inst, BRW_3SRC_TYPE_UD); - brw_inst_set_3src_a16_dst_hw_type(devinfo, inst, BRW_3SRC_TYPE_UD); - break; - default: - unreachable("not reached"); + + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + assert(dest.file == BRW_GENERAL_REGISTER_FILE || + dest.file == BRW_ARCHITECTURE_REGISTER_FILE); + + if (devinfo->gen >= 12) { + brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, dest.file); + brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr); + } else { + if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE) { + brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, + BRW_ALIGN1_3SRC_ACCUMULATOR); + brw_inst_set_3src_dst_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR); + } else { + brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, + BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE); + brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr); + } + } + brw_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, dest.subnr / 8); + + brw_inst_set_3src_a1_dst_hstride(devinfo, inst, BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1); + + if (brw_reg_type_is_floating_point(dest.type)) { + brw_inst_set_3src_a1_exec_type(devinfo, inst, + BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT); + } else { + brw_inst_set_3src_a1_exec_type(devinfo, inst, + BRW_ALIGN1_3SRC_EXEC_TYPE_INT); + } + + brw_inst_set_3src_a1_dst_type(devinfo, inst, dest.type); + brw_inst_set_3src_a1_src0_type(devinfo, inst, src0.type); + brw_inst_set_3src_a1_src1_type(devinfo, inst, src1.type); + brw_inst_set_3src_a1_src2_type(devinfo, inst, src2.type); + + if (src0.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_3src_a1_src0_imm(devinfo, inst, src0.ud); + } else { + brw_inst_set_3src_a1_src0_vstride( + devinfo, inst, to_3src_align1_vstride(devinfo, src0.vstride)); + brw_inst_set_3src_a1_src0_hstride(devinfo, inst, + to_3src_align1_hstride(src0.hstride)); + brw_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, src0.subnr); + if (src0.type == BRW_REGISTER_TYPE_NF) { + brw_inst_set_3src_src0_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR); + } else { + brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr); + } + brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs); + brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate); + } + brw_inst_set_3src_a1_src1_vstride( + devinfo, inst, to_3src_align1_vstride(devinfo, src1.vstride)); + brw_inst_set_3src_a1_src1_hstride(devinfo, inst, + to_3src_align1_hstride(src1.hstride)); + + brw_inst_set_3src_a1_src1_subreg_nr(devinfo, inst, src1.subnr); + if (src1.file == BRW_ARCHITECTURE_REGISTER_FILE) { + brw_inst_set_3src_src1_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR); + } else { + brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr); + } + brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs); + brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate); + + if (src2.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_3src_a1_src2_imm(devinfo, inst, src2.ud); + } else { + brw_inst_set_3src_a1_src2_hstride(devinfo, inst, + to_3src_align1_hstride(src2.hstride)); + /* no vstride on src2 */ + brw_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, src2.subnr); + brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr); + brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs); + brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate); + } + + assert(src0.file == BRW_GENERAL_REGISTER_FILE || + src0.file == BRW_IMMEDIATE_VALUE || + (src0.file == BRW_ARCHITECTURE_REGISTER_FILE && + src0.type == BRW_REGISTER_TYPE_NF)); + assert(src1.file == BRW_GENERAL_REGISTER_FILE || + src1.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(src2.file == BRW_GENERAL_REGISTER_FILE || + src2.file == BRW_IMMEDIATE_VALUE); + + if (devinfo->gen >= 12) { + if (src0.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_3src_a1_src0_is_imm(devinfo, inst, 1); + } else { + brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, src0.file); + } + + brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, src1.file); + + if (src2.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_3src_a1_src2_is_imm(devinfo, inst, 1); + } else { + brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, src2.file); + } + } else { + brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, + src0.file == BRW_GENERAL_REGISTER_FILE ? + BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : + BRW_ALIGN1_3SRC_IMMEDIATE_VALUE); + brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, + src1.file == BRW_GENERAL_REGISTER_FILE ? + BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : + BRW_ALIGN1_3SRC_ACCUMULATOR); + brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, + src2.file == BRW_GENERAL_REGISTER_FILE ? + BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : + BRW_ALIGN1_3SRC_IMMEDIATE_VALUE); + } + + } else { + assert(dest.file == BRW_GENERAL_REGISTER_FILE || + dest.file == BRW_MESSAGE_REGISTER_FILE); + assert(dest.type == BRW_REGISTER_TYPE_F || + dest.type == BRW_REGISTER_TYPE_DF || + dest.type == BRW_REGISTER_TYPE_D || + dest.type == BRW_REGISTER_TYPE_UD || + (dest.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 8)); + if (devinfo->gen == 6) { + brw_inst_set_3src_a16_dst_reg_file(devinfo, inst, + dest.file == BRW_MESSAGE_REGISTER_FILE); + } + brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr); + brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4); + brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask); + + assert(src0.file == BRW_GENERAL_REGISTER_FILE); + brw_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle); + brw_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, get_3src_subreg_nr(src0)); + brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr); + brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs); + brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate); + brw_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst, + src0.vstride == BRW_VERTICAL_STRIDE_0); + + assert(src1.file == BRW_GENERAL_REGISTER_FILE); + brw_inst_set_3src_a16_src1_swizzle(devinfo, inst, src1.swizzle); + brw_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, get_3src_subreg_nr(src1)); + brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr); + brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs); + brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate); + brw_inst_set_3src_a16_src1_rep_ctrl(devinfo, inst, + src1.vstride == BRW_VERTICAL_STRIDE_0); + + assert(src2.file == BRW_GENERAL_REGISTER_FILE); + brw_inst_set_3src_a16_src2_swizzle(devinfo, inst, src2.swizzle); + brw_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, get_3src_subreg_nr(src2)); + brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr); + brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs); + brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate); + brw_inst_set_3src_a16_src2_rep_ctrl(devinfo, inst, + src2.vstride == BRW_VERTICAL_STRIDE_0); + + if (devinfo->gen >= 7) { + /* Set both the source and destination types based on dest.type, + * ignoring the source register types. The MAD and LRP emitters ensure + * that all four types are float. The BFE and BFI2 emitters, however, + * may send us mixed D and UD types and want us to ignore that and use + * the destination type. + */ + brw_inst_set_3src_a16_src_type(devinfo, inst, dest.type); + brw_inst_set_3src_a16_dst_type(devinfo, inst, dest.type); + + /* From the Bspec, 3D Media GPGPU, Instruction fields, srcType: + * + * "Three source instructions can use operands with mixed-mode + * precision. When SrcType field is set to :f or :hf it defines + * precision for source 0 only, and fields Src1Type and Src2Type + * define precision for other source operands: + * + * 0b = :f. Single precision Float (32-bit). + * 1b = :hf. Half precision Float (16-bit)." + */ + if (src1.type == BRW_REGISTER_TYPE_HF) + brw_inst_set_3src_a16_src1_type(devinfo, inst, 1); + + if (src2.type == BRW_REGISTER_TYPE_HF) + brw_inst_set_3src_a16_src2_type(devinfo, inst, 1); } } @@ -788,7 +979,15 @@ brw_inst *brw_##OP(struct brw_codegen *p, \ struct brw_reg src0, \ struct brw_reg src1, \ struct brw_reg src2) \ -{ \ +{ \ + if (p->current->access_mode == BRW_ALIGN_16) { \ + if (src0.vstride == BRW_VERTICAL_STRIDE_0) \ + src0.swizzle = BRW_SWIZZLE_XXXX; \ + if (src1.vstride == BRW_VERTICAL_STRIDE_0) \ + src1.swizzle = BRW_SWIZZLE_XXXX; \ + if (src2.vstride == BRW_VERTICAL_STRIDE_0) \ + src2.swizzle = BRW_SWIZZLE_XXXX; \ + } \ return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \ } @@ -809,37 +1008,19 @@ brw_inst *brw_##OP(struct brw_codegen *p, \ assert(src0.type == BRW_REGISTER_TYPE_DF); \ assert(src1.type == BRW_REGISTER_TYPE_DF); \ assert(src2.type == BRW_REGISTER_TYPE_DF); \ + } \ + \ + if (p->current->access_mode == BRW_ALIGN_16) { \ + if (src0.vstride == BRW_VERTICAL_STRIDE_0) \ + src0.swizzle = BRW_SWIZZLE_XXXX; \ + if (src1.vstride == BRW_VERTICAL_STRIDE_0) \ + src1.swizzle = BRW_SWIZZLE_XXXX; \ + if (src2.vstride == BRW_VERTICAL_STRIDE_0) \ + src2.swizzle = BRW_SWIZZLE_XXXX; \ } \ return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \ } -/* Rounding operations (other than RNDD) require two instructions - the first - * stores a rounded value (possibly the wrong way) in the dest register, but - * also sets a per-channel "increment bit" in the flag register. A predicated - * add of 1.0 fixes dest to contain the desired result. - * - * Sandybridge and later appear to round correctly without an ADD. - */ -#define ROUND(OP) \ -void brw_##OP(struct brw_codegen *p, \ - struct brw_reg dest, \ - struct brw_reg src) \ -{ \ - const struct gen_device_info *devinfo = p->devinfo; \ - brw_inst *rnd, *add; \ - rnd = next_insn(p, BRW_OPCODE_##OP); \ - brw_set_dest(p, rnd, dest); \ - brw_set_src0(p, rnd, src); \ - \ - if (devinfo->gen < 6) { \ - /* turn on round-increments */ \ - brw_inst_set_cond_modifier(devinfo, rnd, BRW_CONDITIONAL_R); \ - add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ - brw_inst_set_pred_control(devinfo, add, BRW_PREDICATE_NORMAL); \ - } \ -} - - ALU2(SEL) ALU1(NOT) ALU2(AND) @@ -849,8 +1030,14 @@ ALU2(SHR) ALU2(SHL) ALU1(DIM) ALU2(ASR) +ALU2(ROL) +ALU2(ROR) +ALU3(CSEL) ALU1(FRC) ALU1(RNDD) +ALU1(RNDE) +ALU1(RNDU) +ALU1(RNDZ) ALU2(MAC) ALU2(MACH) ALU1(LZD) @@ -858,7 +1045,7 @@ ALU2(DP4) ALU2(DPH) ALU2(DP3) ALU2(DP2) -ALU3F(MAD) +ALU3(MAD) ALU3F(LRP) ALU1(BFREV) ALU3(BFE) @@ -870,30 +1057,24 @@ ALU1(CBIT) ALU2(ADDC) ALU2(SUBB) -ROUND(RNDZ) -ROUND(RNDE) - brw_inst * brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0) { const struct gen_device_info *devinfo = p->devinfo; /* When converting F->DF on IVB/BYT, every odd source channel is ignored. - * To avoid the problems that causes, we use a <1,2,0> source region to read - * each element twice. + * To avoid the problems that causes, we use an source region to + * read each element twice. */ if (devinfo->gen == 7 && !devinfo->is_haswell && - brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1 && + brw_get_default_access_mode(p) == BRW_ALIGN_1 && dest.type == BRW_REGISTER_TYPE_DF && (src0.type == BRW_REGISTER_TYPE_F || src0.type == BRW_REGISTER_TYPE_D || src0.type == BRW_REGISTER_TYPE_UD) && !has_scalar_region(src0)) { - assert(src0.vstride == BRW_VERTICAL_STRIDE_4 && - src0.width == BRW_WIDTH_4 && - src0.hstride == BRW_HORIZONTAL_STRIDE_1); - - src0.vstride = BRW_VERTICAL_STRIDE_1; + assert(src0.vstride == src0.width + src0.hstride); + src0.vstride = src0.hstride; src0.width = BRW_WIDTH_2; src0.hstride = BRW_HORIZONTAL_STRIDE_0; } @@ -1005,7 +1186,7 @@ brw_inst * brw_F32TO16(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src) { const struct gen_device_info *devinfo = p->devinfo; - const bool align16 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_16; + const bool align16 = brw_get_default_access_mode(p) == BRW_ALIGN_16; /* The F32TO16 instruction doesn't support 32-bit destination types in * Align1 mode, and neither does the Gen8 implementation in terms of a * converting MOV. Gen7 does zero out the high 16 bits in Align16 mode as @@ -1039,9 +1220,12 @@ brw_F32TO16(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src) } if (needs_zero_fill) { - brw_inst_set_no_dd_clear(devinfo, inst, true); + if (devinfo->gen < 12) + brw_inst_set_no_dd_clear(devinfo, inst, true); + brw_set_default_swsb(p, tgl_swsb_null()); inst = brw_MOV(p, suboffset(dst, 1), brw_imm_w(0)); - brw_inst_set_no_dd_check(devinfo, inst, true); + if (devinfo->gen < 12) + brw_inst_set_no_dd_check(devinfo, inst, true); } brw_pop_insn_state(p); @@ -1052,7 +1236,7 @@ brw_inst * brw_F16TO32(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src) { const struct gen_device_info *devinfo = p->devinfo; - bool align16 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_16; + bool align16 = brw_get_default_access_mode(p) == BRW_ALIGN_16; if (align16) { assert(src.type == BRW_REGISTER_TYPE_UD); @@ -1087,9 +1271,11 @@ void brw_NOP(struct brw_codegen *p) brw_inst_set_opcode(p->devinfo, insn, BRW_OPCODE_NOP); } - - - +void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func) +{ + brw_inst *insn = next_insn(p, BRW_OPCODE_SYNC); + brw_inst_set_cond_modifier(p->devinfo, insn, func); +} /*********************************************************************** * Comparisons, if/else/endif @@ -1103,7 +1289,7 @@ brw_JMPI(struct brw_codegen *p, struct brw_reg index, struct brw_reg ip = brw_ip_reg(); brw_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index); - brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_2); + brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_1); brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE); brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE); brw_inst_set_pred_control(devinfo, inst, predicate_control); @@ -1193,7 +1379,8 @@ brw_IF(struct brw_codegen *p, unsigned execute_size) brw_inst_set_uip(devinfo, insn, 0); } else { brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); - brw_set_src0(p, insn, brw_imm_d(0)); + if (devinfo->gen < 12) + brw_set_src0(p, insn, brw_imm_d(0)); brw_inst_set_jip(devinfo, insn, 0); brw_inst_set_uip(devinfo, insn, 0); } @@ -1223,8 +1410,7 @@ gen6_IF(struct brw_codegen *p, enum brw_conditional_mod conditional, insn = next_insn(p, BRW_OPCODE_IF); brw_set_dest(p, insn, brw_imm_w(0)); - brw_inst_set_exec_size(devinfo, insn, - brw_inst_exec_size(devinfo, p->current)); + brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p)); brw_inst_set_gen6_jump_count(devinfo, insn, 0); brw_set_src0(p, insn, src0); brw_set_src1(p, insn, src1); @@ -1394,7 +1580,8 @@ brw_ELSE(struct brw_codegen *p) brw_inst_set_uip(devinfo, insn, 0); } else { brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src0(p, insn, brw_imm_d(0)); + if (devinfo->gen < 12) + brw_set_src0(p, insn, brw_imm_d(0)); brw_inst_set_jip(devinfo, insn, 0); brw_inst_set_uip(devinfo, insn, 0); } @@ -1510,8 +1697,7 @@ brw_BREAK(struct brw_codegen *p) p->if_depth_in_loop[p->loop_stack_depth]); } brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); - brw_inst_set_exec_size(devinfo, insn, - brw_inst_exec_size(devinfo, p->current)); + brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p)); return insn; } @@ -1536,29 +1722,36 @@ brw_CONT(struct brw_codegen *p) p->if_depth_in_loop[p->loop_stack_depth]); } brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); - brw_inst_set_exec_size(devinfo, insn, - brw_inst_exec_size(devinfo, p->current)); + brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p)); return insn; } brw_inst * -gen6_HALT(struct brw_codegen *p) +brw_HALT(struct brw_codegen *p) { const struct gen_device_info *devinfo = p->devinfo; brw_inst *insn; insn = next_insn(p, BRW_OPCODE_HALT); brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - if (devinfo->gen >= 8) { - brw_set_src0(p, insn, brw_imm_d(0x0)); - } else { + if (devinfo->gen < 6) { + /* From the Gen4 PRM: + * + * "IP register must be put (for example, by the assembler) at + * and locations. + */ + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); /* exitcode updated later. */ + } else if (devinfo->gen < 8) { brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */ + } else if (devinfo->gen < 12) { + brw_set_src0(p, insn, brw_imm_d(0x0)); } brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); - brw_inst_set_exec_size(devinfo, insn, - brw_inst_exec_size(devinfo, p->current)); + brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p)); return insn; } @@ -1650,7 +1843,8 @@ brw_WHILE(struct brw_codegen *p) if (devinfo->gen >= 8) { brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src0(p, insn, brw_imm_d(0)); + if (devinfo->gen < 12) + brw_set_src0(p, insn, brw_imm_d(0)); brw_inst_set_jip(devinfo, insn, br * (do_insn - insn)); } else if (devinfo->gen == 7) { brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); @@ -1664,8 +1858,7 @@ brw_WHILE(struct brw_codegen *p) brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); } - brw_inst_set_exec_size(devinfo, insn, - brw_inst_exec_size(devinfo, p->current)); + brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p)); } else { if (p->single_program_flow) { @@ -1819,8 +2012,10 @@ void gen6_math(struct brw_codegen *p, assert(src1.file == BRW_GENERAL_REGISTER_FILE || (devinfo->gen >= 8 && src1.file == BRW_IMMEDIATE_VALUE)); } else { - assert(src0.type == BRW_REGISTER_TYPE_F); - assert(src1.type == BRW_REGISTER_TYPE_F); + assert(src0.type == BRW_REGISTER_TYPE_F || + (src0.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 9)); + assert(src1.type == BRW_REGISTER_TYPE_F || + (src1.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 9)); } /* Source modifiers are ignored for extended math instructions on Gen6. */ @@ -1868,7 +2063,8 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, const unsigned target_cache = (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE : devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE : - BRW_DATAPORT_READ_TARGET_RENDER_CACHE); + BRW_SFID_DATAPORT_WRITE); + const struct tgl_swsb swsb = brw_get_default_swsb(p); uint32_t msg_type; if (devinfo->gen >= 6) @@ -1888,10 +2084,13 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); /* set message header global offset field (reg 0, element 2) */ + brw_set_default_exec_size(p, BRW_EXECUTE_1); + brw_set_default_swsb(p, tgl_swsb_null()); brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, @@ -1899,6 +2098,7 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, brw_imm_ud(offset)); brw_pop_insn_state(p); + brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); } { @@ -1908,6 +2108,7 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, struct brw_reg src_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); + brw_inst_set_sfid(devinfo, insn, target_cache); brw_inst_set_compression(devinfo, insn, false); if (brw_inst_exec_size(devinfo, insn) >= 16) @@ -1947,18 +2148,12 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, else msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; - brw_set_dp_write_message(p, - insn, - brw_scratch_surface_idx(p), - BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8), - msg_type, - target_cache, - mlen, - true, /* header_present */ - 0, /* not a render target */ - send_commit_msg, /* response_length */ - 0, /* eot */ - send_commit_msg); + brw_set_desc(p, insn, + brw_message_desc(devinfo, mlen, send_commit_msg, true) | + brw_dp_write_desc(devinfo, brw_scratch_surface_idx(p), + BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8), + msg_type, 0, /* not a render target */ + send_commit_msg)); } } @@ -1978,6 +2173,7 @@ brw_oword_block_read_scratch(struct brw_codegen *p, unsigned offset) { const struct gen_device_info *devinfo = p->devinfo; + const struct tgl_swsb swsb = brw_get_default_swsb(p); if (devinfo->gen >= 6) offset /= 16; @@ -2000,10 +2196,11 @@ brw_oword_block_read_scratch(struct brw_codegen *p, const unsigned target_cache = (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE : devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE : - BRW_DATAPORT_READ_TARGET_RENDER_CACHE); + BRW_SFID_DATAPORT_READ); { brw_push_insn_state(p); + brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_set_default_mask_control(p, BRW_MASK_DISABLE); @@ -2011,14 +2208,18 @@ brw_oword_block_read_scratch(struct brw_codegen *p, brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); /* set message header global offset field (reg 0, element 2) */ + brw_set_default_exec_size(p, BRW_EXECUTE_1); + brw_set_default_swsb(p, tgl_swsb_null()); brw_MOV(p, get_element_ud(mrf, 2), brw_imm_ud(offset)); brw_pop_insn_state(p); + brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); } { brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); + brw_inst_set_sfid(devinfo, insn, target_cache); assert(brw_inst_pred_control(devinfo, insn) == 0); brw_inst_set_compression(devinfo, insn, false); @@ -2030,15 +2231,12 @@ brw_oword_block_read_scratch(struct brw_codegen *p, brw_inst_set_base_mrf(devinfo, insn, mrf.nr); } - brw_set_dp_read_message(p, - insn, - brw_scratch_surface_idx(p), - BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8), - BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ - target_cache, - 1, /* msg_length */ - true, /* header_present */ - rlen); + brw_set_desc(p, insn, + brw_message_desc(devinfo, 1, rlen, true) | + brw_dp_read_desc(devinfo, brw_scratch_surface_idx(p), + BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8), + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, + BRW_DATAPORT_READ_TARGET_RENDER_CACHE)); } } @@ -2090,8 +2288,9 @@ void brw_oword_block_read(struct brw_codegen *p, const struct gen_device_info *devinfo = p->devinfo; const unsigned target_cache = (devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE : - BRW_DATAPORT_READ_TARGET_DATA_CACHE); - const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current); + BRW_SFID_DATAPORT_READ); + const unsigned exec_size = 1 << brw_get_default_exec_size(p); + const struct tgl_swsb swsb = brw_get_default_swsb(p); /* On newer hardware, offset is in units of owords. */ if (devinfo->gen >= 6) @@ -2106,9 +2305,12 @@ void brw_oword_block_read(struct brw_codegen *p, brw_push_insn_state(p); brw_set_default_exec_size(p, BRW_EXECUTE_8); + brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); /* set message header global offset field (reg 0, element 2) */ + brw_set_default_exec_size(p, BRW_EXECUTE_1); + brw_set_default_swsb(p, tgl_swsb_null()); brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, @@ -2116,8 +2318,12 @@ void brw_oword_block_read(struct brw_codegen *p, brw_imm_ud(offset)); brw_pop_insn_state(p); + brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); + brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); + brw_inst_set_sfid(devinfo, insn, target_cache); + /* cast dest to a uword[8] vector */ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); @@ -2129,38 +2335,37 @@ void brw_oword_block_read(struct brw_codegen *p, brw_inst_set_base_mrf(devinfo, insn, mrf.nr); } - brw_set_dp_read_message(p, insn, bind_table_index, - BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size), - BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, - target_cache, - 1, /* msg_length */ - true, /* header_present */ - DIV_ROUND_UP(exec_size, 8)); /* response_length */ + brw_set_desc(p, insn, + brw_message_desc(devinfo, 1, DIV_ROUND_UP(exec_size, 8), true) | + brw_dp_read_desc(devinfo, bind_table_index, + BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size), + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, + BRW_DATAPORT_READ_TARGET_DATA_CACHE)); brw_pop_insn_state(p); } - -void brw_fb_WRITE(struct brw_codegen *p, - struct brw_reg payload, - struct brw_reg implied_header, - unsigned msg_control, - unsigned binding_table_index, - unsigned msg_length, - unsigned response_length, - bool eot, - bool last_render_target, - bool header_present) +brw_inst * +brw_fb_WRITE(struct brw_codegen *p, + struct brw_reg payload, + struct brw_reg implied_header, + unsigned msg_control, + unsigned binding_table_index, + unsigned msg_length, + unsigned response_length, + bool eot, + bool last_render_target, + bool header_present) { const struct gen_device_info *devinfo = p->devinfo; const unsigned target_cache = (devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE : - BRW_DATAPORT_READ_TARGET_RENDER_CACHE); + BRW_SFID_DATAPORT_WRITE); brw_inst *insn; unsigned msg_type; struct brw_reg dest, src0; - if (brw_inst_exec_size(devinfo, p->current) >= BRW_EXECUTE_16) + if (brw_get_default_exec_size(p) >= BRW_EXECUTE_16) dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); else dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); @@ -2170,6 +2375,7 @@ void brw_fb_WRITE(struct brw_codegen *p, } else { insn = next_insn(p, BRW_OPCODE_SEND); } + brw_inst_set_sfid(devinfo, insn, target_cache); brw_inst_set_compression(devinfo, insn, false); if (devinfo->gen >= 6) { @@ -2187,18 +2393,15 @@ void brw_fb_WRITE(struct brw_codegen *p, brw_set_dest(p, insn, dest); brw_set_src0(p, insn, src0); - brw_set_dp_write_message(p, - insn, - binding_table_index, - msg_control, - msg_type, - target_cache, - msg_length, - header_present, - last_render_target, - response_length, - eot, - 0 /* send_commit_msg */); + brw_set_desc(p, insn, + brw_message_desc(devinfo, msg_length, response_length, + header_present) | + brw_dp_write_desc(devinfo, binding_table_index, msg_control, + msg_type, last_render_target, + 0 /* send_commit_msg */)); + brw_inst_set_eot(devinfo, insn, eot); + + return insn; } brw_inst * @@ -2213,19 +2416,20 @@ gen9_fb_READ(struct brw_codegen *p, const struct gen_device_info *devinfo = p->devinfo; assert(devinfo->gen >= 9); const unsigned msg_subtype = - brw_inst_exec_size(devinfo, p->current) == BRW_EXECUTE_16 ? 0 : 1; + brw_get_default_exec_size(p) == BRW_EXECUTE_16 ? 0 : 1; brw_inst *insn = next_insn(p, BRW_OPCODE_SENDC); + brw_inst_set_sfid(devinfo, insn, GEN6_SFID_DATAPORT_RENDER_CACHE); brw_set_dest(p, insn, dst); brw_set_src0(p, insn, payload); - brw_set_dp_read_message(p, insn, binding_table_index, - per_sample << 5 | msg_subtype, - GEN9_DATAPORT_RC_RENDER_TARGET_READ, - GEN6_SFID_DATAPORT_RENDER_CACHE, - msg_length, true /* header_present */, - response_length); - brw_inst_set_rt_slot_group(devinfo, insn, - brw_inst_qtr_control(devinfo, p->current) / 2); + brw_set_desc( + p, insn, + brw_message_desc(devinfo, msg_length, response_length, true) | + brw_dp_read_desc(devinfo, binding_table_index, + per_sample << 5 | msg_subtype, + GEN9_DATAPORT_RC_RENDER_TARGET_READ, + BRW_DATAPORT_READ_TARGET_RENDER_CACHE)); + brw_inst_set_rt_slot_group(devinfo, insn, brw_get_default_group(p) / 16); return insn; } @@ -2255,6 +2459,7 @@ void brw_SAMPLE(struct brw_codegen *p, gen6_resolve_implied_move(p, &src0, msg_reg_nr); insn = next_insn(p, BRW_OPCODE_SEND); + brw_inst_set_sfid(devinfo, insn, BRW_SFID_SAMPLER); brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NONE); /* XXX */ /* From the 965 PRM (volume 4, part 1, section 14.2.41): @@ -2276,15 +2481,11 @@ void brw_SAMPLE(struct brw_codegen *p, brw_set_dest(p, insn, dest); brw_set_src0(p, insn, src0); - brw_set_sampler_message(p, insn, - binding_table_index, - sampler, - msg_type, - response_length, - msg_length, - header_present, - simd_mode, - return_format); + brw_set_desc(p, insn, + brw_message_desc(devinfo, msg_length, response_length, + header_present) | + brw_sampler_desc(devinfo, binding_table_index, sampler, + msg_type, simd_mode, return_format)); } /* Adjust the message header's sampler state pointer to @@ -2324,12 +2525,15 @@ void brw_adjust_sampler_state_pointer(struct brw_codegen *p, struct brw_reg temp = get_element_ud(header, 3); + brw_push_insn_state(p); brw_AND(p, temp, get_element_ud(sampler_index, 0), brw_imm_ud(0x0f0)); + brw_set_default_swsb(p, tgl_swsb_regdist(1)); brw_SHL(p, temp, temp, brw_imm_ud(4)); brw_ADD(p, get_element_ud(header, 3), get_element_ud(brw_vec8_grf(0, 0), 3), temp); + brw_pop_insn_state(p); } } @@ -2357,6 +2561,7 @@ void brw_urb_WRITE(struct brw_codegen *p, brw_push_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_1); brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_exec_size(p, BRW_EXECUTE_1); brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5), BRW_REGISTER_TYPE_UD), retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD), @@ -2384,103 +2589,210 @@ void brw_urb_WRITE(struct brw_codegen *p, swizzle); } -struct brw_inst * +void brw_send_indirect_message(struct brw_codegen *p, unsigned sfid, struct brw_reg dst, struct brw_reg payload, - struct brw_reg desc) + struct brw_reg desc, + unsigned desc_imm, + bool eot) { const struct gen_device_info *devinfo = p->devinfo; struct brw_inst *send; - int setup; dst = retype(dst, BRW_REGISTER_TYPE_UW); assert(desc.type == BRW_REGISTER_TYPE_UD); - /* We hold on to the setup instruction (the SEND in the direct case, the OR - * in the indirect case) by its index in the instruction store. The - * pointer returned by next_insn() may become invalid if emitting the SEND - * in the indirect case reallocs the store. - */ - if (desc.file == BRW_IMMEDIATE_VALUE) { - setup = p->nr_insn; send = next_insn(p, BRW_OPCODE_SEND); - brw_set_src1(p, send, desc); - + brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD)); + brw_set_desc(p, send, desc.ud | desc_imm); } else { + const struct tgl_swsb swsb = brw_get_default_swsb(p); struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); brw_push_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_1); brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_exec_size(p, BRW_EXECUTE_1); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); /* Load the indirect descriptor to an address register using OR so the - * caller can specify additional descriptor bits with the usual - * brw_set_*_message() helper functions. + * caller can specify additional descriptor bits with the desc_imm + * immediate. */ - setup = p->nr_insn; - brw_OR(p, addr, desc, brw_imm_ud(0)); + brw_OR(p, addr, desc, brw_imm_ud(desc_imm)); brw_pop_insn_state(p); + brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); send = next_insn(p, BRW_OPCODE_SEND); - brw_set_src1(p, send, addr); - } + brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD)); - if (dst.width < BRW_EXECUTE_8) - brw_inst_set_exec_size(devinfo, send, dst.width); + if (devinfo->gen >= 12) + brw_inst_set_send_sel_reg32_desc(devinfo, send, true); + else + brw_set_src1(p, send, addr); + } brw_set_dest(p, send, dst); - brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD)); brw_inst_set_sfid(devinfo, send, sfid); + brw_inst_set_eot(devinfo, send, eot); +} + +void +brw_send_indirect_split_message(struct brw_codegen *p, + unsigned sfid, + struct brw_reg dst, + struct brw_reg payload0, + struct brw_reg payload1, + struct brw_reg desc, + unsigned desc_imm, + struct brw_reg ex_desc, + unsigned ex_desc_imm, + bool eot) +{ + const struct gen_device_info *devinfo = p->devinfo; + struct brw_inst *send; + + dst = retype(dst, BRW_REGISTER_TYPE_UW); + + assert(desc.type == BRW_REGISTER_TYPE_UD); + + if (desc.file == BRW_IMMEDIATE_VALUE) { + desc.ud |= desc_imm; + } else { + const struct tgl_swsb swsb = brw_get_default_swsb(p); + struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); + + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_exec_size(p, BRW_EXECUTE_1); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); + + /* Load the indirect descriptor to an address register using OR so the + * caller can specify additional descriptor bits with the desc_imm + * immediate. + */ + brw_OR(p, addr, desc, brw_imm_ud(desc_imm)); + + brw_pop_insn_state(p); + desc = addr; + + brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); + } + + if (ex_desc.file == BRW_IMMEDIATE_VALUE && + (devinfo->gen >= 12 || (ex_desc.ud & INTEL_MASK(15, 12)) == 0)) { + ex_desc.ud |= ex_desc_imm; + } else { + const struct tgl_swsb swsb = brw_get_default_swsb(p); + struct brw_reg addr = retype(brw_address_reg(2), BRW_REGISTER_TYPE_UD); + + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_exec_size(p, BRW_EXECUTE_1); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); + + /* Load the indirect extended descriptor to an address register using OR + * so the caller can specify additional descriptor bits with the + * desc_imm immediate. + * + * Even though the instruction dispatcher always pulls the SFID and EOT + * fields from the instruction itself, actual external unit which + * processes the message gets the SFID and EOT from the extended + * descriptor which comes from the address register. If we don't OR + * those two bits in, the external unit may get confused and hang. + */ + unsigned imm_part = ex_desc_imm | sfid | eot << 5; + + if (ex_desc.file == BRW_IMMEDIATE_VALUE) { + /* ex_desc bits 15:12 don't exist in the instruction encoding prior + * to Gen12, so we may have fallen back to an indirect extended + * descriptor. + */ + brw_MOV(p, addr, brw_imm_ud(ex_desc.ud | imm_part)); + } else { + brw_OR(p, addr, ex_desc, brw_imm_ud(imm_part)); + } + + brw_pop_insn_state(p); + ex_desc = addr; + + brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); + } + + send = next_insn(p, devinfo->gen >= 12 ? BRW_OPCODE_SEND : BRW_OPCODE_SENDS); + brw_set_dest(p, send, dst); + brw_set_src0(p, send, retype(payload0, BRW_REGISTER_TYPE_UD)); + brw_set_src1(p, send, retype(payload1, BRW_REGISTER_TYPE_UD)); + + if (desc.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_send_sel_reg32_desc(devinfo, send, 0); + brw_inst_set_send_desc(devinfo, send, desc.ud); + } else { + assert(desc.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(desc.nr == BRW_ARF_ADDRESS); + assert(desc.subnr == 0); + brw_inst_set_send_sel_reg32_desc(devinfo, send, 1); + } + + if (ex_desc.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0); + brw_inst_set_sends_ex_desc(devinfo, send, ex_desc.ud); + } else { + assert(ex_desc.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(ex_desc.nr == BRW_ARF_ADDRESS); + assert((ex_desc.subnr & 0x3) == 0); + brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1); + brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, ex_desc.subnr >> 2); + } - return &p->store[setup]; + brw_inst_set_sfid(devinfo, send, sfid); + brw_inst_set_eot(devinfo, send, eot); } -static struct brw_inst * +static void brw_send_indirect_surface_message(struct brw_codegen *p, unsigned sfid, struct brw_reg dst, struct brw_reg payload, struct brw_reg surface, - unsigned message_len, - unsigned response_len, - bool header_present) + unsigned desc_imm) { - const struct gen_device_info *devinfo = p->devinfo; - struct brw_inst *insn; - if (surface.file != BRW_IMMEDIATE_VALUE) { + const struct tgl_swsb swsb = brw_get_default_swsb(p); struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); brw_push_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_1); brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_exec_size(p, BRW_EXECUTE_1); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); /* Mask out invalid bits from the surface index to avoid hangs e.g. when * some surface array is accessed out of bounds. */ - insn = brw_AND(p, addr, - suboffset(vec1(retype(surface, BRW_REGISTER_TYPE_UD)), - BRW_GET_SWZ(surface.swizzle, 0)), - brw_imm_ud(0xff)); + brw_AND(p, addr, + suboffset(vec1(retype(surface, BRW_REGISTER_TYPE_UD)), + BRW_GET_SWZ(surface.swizzle, 0)), + brw_imm_ud(0xff)); brw_pop_insn_state(p); surface = addr; + brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); } - insn = brw_send_indirect_message(p, sfid, dst, payload, surface); - brw_inst_set_mlen(devinfo, insn, message_len); - brw_inst_set_rlen(devinfo, insn, response_len); - brw_inst_set_header_present(devinfo, insn, header_present); - - return insn; + brw_send_indirect_message(p, sfid, dst, payload, surface, desc_imm, false); } static bool @@ -2529,6 +2841,8 @@ brw_find_next_block_end(struct brw_codegen *p, int start_offset) case BRW_OPCODE_HALT: if (depth == 0) return offset; + default: + break; } } @@ -2634,6 +2948,9 @@ brw_set_uip_jip(struct brw_codegen *p, int start_offset) assert(brw_inst_uip(devinfo, insn) != 0); assert(brw_inst_jip(devinfo, insn) != 0); break; + + default: + break; } } } @@ -2689,75 +3006,35 @@ brw_svb_write(struct brw_codegen *p, const unsigned target_cache = (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE : devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE : - BRW_DATAPORT_READ_TARGET_RENDER_CACHE); + BRW_SFID_DATAPORT_WRITE); brw_inst *insn; gen6_resolve_implied_move(p, &src0, msg_reg_nr); insn = next_insn(p, BRW_OPCODE_SEND); + brw_inst_set_sfid(devinfo, insn, target_cache); brw_set_dest(p, insn, dest); brw_set_src0(p, insn, src0); - brw_set_src1(p, insn, brw_imm_d(0)); - brw_set_dp_write_message(p, insn, - binding_table_index, - 0, /* msg_control: ignored */ - GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE, - target_cache, - 1, /* msg_length */ - true, /* header_present */ - 0, /* last_render_target: ignored */ - send_commit_msg, /* response_length */ - 0, /* end_of_thread */ - send_commit_msg); /* send_commit_msg */ + brw_set_desc(p, insn, + brw_message_desc(devinfo, 1, send_commit_msg, true) | + brw_dp_write_desc(devinfo, binding_table_index, + 0, /* msg_control: ignored */ + GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE, + 0, /* last_render_target: ignored */ + send_commit_msg)); /* send_commit_msg */ } static unsigned brw_surface_payload_size(struct brw_codegen *p, unsigned num_channels, - bool has_simd4x2, - bool has_simd16) -{ - if (has_simd4x2 && - brw_inst_access_mode(p->devinfo, p->current) == BRW_ALIGN_16) - return 1; - else if (has_simd16 && - brw_inst_exec_size(p->devinfo, p->current) == BRW_EXECUTE_16) - return 2 * num_channels; - else - return num_channels; -} - -static void -brw_set_dp_untyped_atomic_message(struct brw_codegen *p, - brw_inst *insn, - unsigned atomic_op, - bool response_expected) + unsigned exec_size /**< 0 for SIMD4x2 */) { - const struct gen_device_info *devinfo = p->devinfo; - unsigned msg_control = - atomic_op | /* Atomic Operation Type: BRW_AOP_* */ - (response_expected ? 1 << 5 : 0); /* Return data expected */ - - if (devinfo->gen >= 8 || devinfo->is_haswell) { - if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { - if (brw_inst_exec_size(devinfo, p->current) != BRW_EXECUTE_16) - msg_control |= 1 << 4; /* SIMD8 mode */ - - brw_inst_set_dp_msg_type(devinfo, insn, - HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP); - } else { - brw_inst_set_dp_msg_type(devinfo, insn, - HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2); - } - } else { - brw_inst_set_dp_msg_type(devinfo, insn, - GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP); - - if (brw_inst_exec_size(devinfo, p->current) != BRW_EXECUTE_16) - msg_control |= 1 << 4; /* SIMD8 mode */ - } - - brw_inst_set_dp_msg_control(devinfo, insn, msg_control); + if (exec_size == 0) + return 1; /* SIMD4x2 */ + else if (exec_size <= 8) + return num_channels; + else + return 2 * num_channels; } void @@ -2767,13 +3044,24 @@ brw_untyped_atomic(struct brw_codegen *p, struct brw_reg surface, unsigned atomic_op, unsigned msg_length, - bool response_expected) + bool response_expected, + bool header_present) { const struct gen_device_info *devinfo = p->devinfo; const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? HSW_SFID_DATAPORT_DATA_CACHE_1 : GEN7_SFID_DATAPORT_DATA_CACHE); - const bool align1 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1; + const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; + /* SIMD4x2 untyped atomic instructions only exist on HSW+ */ + const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell; + const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : + has_simd4x2 ? 0 : 8; + const unsigned response_length = + brw_surface_payload_size(p, response_expected, exec_size); + const unsigned desc = + brw_message_desc(devinfo, msg_length, response_length, header_present) | + brw_dp_untyped_atomic_desc(devinfo, exec_size, atomic_op, + response_expected); /* Mask out unused components -- This is especially important in Align16 * mode on generations that don't have native support for SIMD4x2 atomics, * because unused but enabled components will cause the dataport to perform @@ -2781,37 +3069,9 @@ brw_untyped_atomic(struct brw_codegen *p, * uninitialized Y, Z and W coordinates of the payload. */ const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X; - struct brw_inst *insn = brw_send_indirect_surface_message( - p, sfid, brw_writemask(dst, mask), payload, surface, msg_length, - brw_surface_payload_size(p, response_expected, - devinfo->gen >= 8 || devinfo->is_haswell, true), - align1); - - brw_set_dp_untyped_atomic_message( - p, insn, atomic_op, response_expected); -} - -static void -brw_set_dp_untyped_surface_read_message(struct brw_codegen *p, - struct brw_inst *insn, - unsigned num_channels) -{ - const struct gen_device_info *devinfo = p->devinfo; - /* Set mask of 32-bit channels to drop. */ - unsigned msg_control = 0xf & (0xf << num_channels); - if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { - if (brw_inst_exec_size(devinfo, p->current) == BRW_EXECUTE_16) - msg_control |= 1 << 4; /* SIMD16 mode */ - else - msg_control |= 2 << 4; /* SIMD8 mode */ - } - - brw_inst_set_dp_msg_type(devinfo, insn, - (devinfo->gen >= 8 || devinfo->is_haswell ? - HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ : - GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ)); - brw_inst_set_dp_msg_control(devinfo, insn, msg_control); + brw_send_indirect_surface_message(p, sfid, brw_writemask(dst, mask), + payload, surface, desc); } void @@ -2826,41 +3086,15 @@ brw_untyped_surface_read(struct brw_codegen *p, const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? HSW_SFID_DATAPORT_DATA_CACHE_1 : GEN7_SFID_DATAPORT_DATA_CACHE); - struct brw_inst *insn = brw_send_indirect_surface_message( - p, sfid, dst, payload, surface, msg_length, - brw_surface_payload_size(p, num_channels, true, true), - false); - - brw_set_dp_untyped_surface_read_message( - p, insn, num_channels); -} - -static void -brw_set_dp_untyped_surface_write_message(struct brw_codegen *p, - struct brw_inst *insn, - unsigned num_channels) -{ - const struct gen_device_info *devinfo = p->devinfo; - /* Set mask of 32-bit channels to drop. */ - unsigned msg_control = 0xf & (0xf << num_channels); - - if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { - if (brw_inst_exec_size(devinfo, p->current) == BRW_EXECUTE_16) - msg_control |= 1 << 4; /* SIMD16 mode */ - else - msg_control |= 2 << 4; /* SIMD8 mode */ - } else { - if (devinfo->gen >= 8 || devinfo->is_haswell) - msg_control |= 0 << 4; /* SIMD4x2 mode */ - else - msg_control |= 2 << 4; /* SIMD8 mode */ - } + const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; + const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : 0; + const unsigned response_length = + brw_surface_payload_size(p, num_channels, exec_size); + const unsigned desc = + brw_message_desc(devinfo, msg_length, response_length, false) | + brw_dp_untyped_surface_rw_desc(devinfo, exec_size, num_channels, false); - brw_inst_set_dp_msg_type(devinfo, insn, - devinfo->gen >= 8 || devinfo->is_haswell ? - HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE : - GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE); - brw_inst_set_dp_msg_control(devinfo, insn, msg_control); + brw_send_indirect_surface_message(p, sfid, dst, payload, surface, desc); } void @@ -2868,206 +3102,41 @@ brw_untyped_surface_write(struct brw_codegen *p, struct brw_reg payload, struct brw_reg surface, unsigned msg_length, - unsigned num_channels) + unsigned num_channels, + bool header_present) { const struct gen_device_info *devinfo = p->devinfo; const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? HSW_SFID_DATAPORT_DATA_CACHE_1 : GEN7_SFID_DATAPORT_DATA_CACHE); - const bool align1 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1; - /* Mask out unused components -- See comment in brw_untyped_atomic(). */ - const unsigned mask = devinfo->gen == 7 && !devinfo->is_haswell && !align1 ? - WRITEMASK_X : WRITEMASK_XYZW; - struct brw_inst *insn = brw_send_indirect_surface_message( - p, sfid, brw_writemask(brw_null_reg(), mask), - payload, surface, msg_length, 0, align1); - - brw_set_dp_untyped_surface_write_message( - p, insn, num_channels); -} - -static void -brw_set_dp_typed_atomic_message(struct brw_codegen *p, - struct brw_inst *insn, - unsigned atomic_op, - bool response_expected) -{ - const struct gen_device_info *devinfo = p->devinfo; - unsigned msg_control = - atomic_op | /* Atomic Operation Type: BRW_AOP_* */ - (response_expected ? 1 << 5 : 0); /* Return data expected */ - - if (devinfo->gen >= 8 || devinfo->is_haswell) { - if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { - if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1) - msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */ - - brw_inst_set_dp_msg_type(devinfo, insn, - HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP); - } else { - brw_inst_set_dp_msg_type(devinfo, insn, - HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2); - } - - } else { - brw_inst_set_dp_msg_type(devinfo, insn, - GEN7_DATAPORT_RC_TYPED_ATOMIC_OP); - - if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1) - msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */ - } - - brw_inst_set_dp_msg_control(devinfo, insn, msg_control); -} - -void -brw_typed_atomic(struct brw_codegen *p, - struct brw_reg dst, - struct brw_reg payload, - struct brw_reg surface, - unsigned atomic_op, - unsigned msg_length, - bool response_expected) { - const struct gen_device_info *devinfo = p->devinfo; - const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? - HSW_SFID_DATAPORT_DATA_CACHE_1 : - GEN6_SFID_DATAPORT_RENDER_CACHE); - const bool align1 = (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1); - /* Mask out unused components -- See comment in brw_untyped_atomic(). */ - const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X; - struct brw_inst *insn = brw_send_indirect_surface_message( - p, sfid, brw_writemask(dst, mask), payload, surface, msg_length, - brw_surface_payload_size(p, response_expected, - devinfo->gen >= 8 || devinfo->is_haswell, false), - true); - - brw_set_dp_typed_atomic_message( - p, insn, atomic_op, response_expected); -} - -static void -brw_set_dp_typed_surface_read_message(struct brw_codegen *p, - struct brw_inst *insn, - unsigned num_channels) -{ - const struct gen_device_info *devinfo = p->devinfo; - /* Set mask of unused channels. */ - unsigned msg_control = 0xf & (0xf << num_channels); - - if (devinfo->gen >= 8 || devinfo->is_haswell) { - if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { - if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1) - msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */ - else - msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */ - } - - brw_inst_set_dp_msg_type(devinfo, insn, - HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ); - } else { - if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { - if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1) - msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */ - } - - brw_inst_set_dp_msg_type(devinfo, insn, - GEN7_DATAPORT_RC_TYPED_SURFACE_READ); - } - - brw_inst_set_dp_msg_control(devinfo, insn, msg_control); -} - -void -brw_typed_surface_read(struct brw_codegen *p, - struct brw_reg dst, - struct brw_reg payload, - struct brw_reg surface, - unsigned msg_length, - unsigned num_channels) -{ - const struct gen_device_info *devinfo = p->devinfo; - const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? - HSW_SFID_DATAPORT_DATA_CACHE_1 : - GEN6_SFID_DATAPORT_RENDER_CACHE); - struct brw_inst *insn = brw_send_indirect_surface_message( - p, sfid, dst, payload, surface, msg_length, - brw_surface_payload_size(p, num_channels, - devinfo->gen >= 8 || devinfo->is_haswell, false), - true); - - brw_set_dp_typed_surface_read_message( - p, insn, num_channels); -} - -static void -brw_set_dp_typed_surface_write_message(struct brw_codegen *p, - struct brw_inst *insn, - unsigned num_channels) -{ - const struct gen_device_info *devinfo = p->devinfo; - /* Set mask of unused channels. */ - unsigned msg_control = 0xf & (0xf << num_channels); - - if (devinfo->gen >= 8 || devinfo->is_haswell) { - if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { - if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1) - msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */ - else - msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */ - } - - brw_inst_set_dp_msg_type(devinfo, insn, - HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE); - - } else { - if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { - if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1) - msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */ - } - - brw_inst_set_dp_msg_type(devinfo, insn, - GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE); - } - - brw_inst_set_dp_msg_control(devinfo, insn, msg_control); -} - -void -brw_typed_surface_write(struct brw_codegen *p, - struct brw_reg payload, - struct brw_reg surface, - unsigned msg_length, - unsigned num_channels) -{ - const struct gen_device_info *devinfo = p->devinfo; - const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? - HSW_SFID_DATAPORT_DATA_CACHE_1 : - GEN6_SFID_DATAPORT_RENDER_CACHE); - const bool align1 = (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1); + const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; + /* SIMD4x2 untyped surface write instructions only exist on HSW+ */ + const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell; + const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : + has_simd4x2 ? 0 : 8; + const unsigned desc = + brw_message_desc(devinfo, msg_length, 0, header_present) | + brw_dp_untyped_surface_rw_desc(devinfo, exec_size, num_channels, true); /* Mask out unused components -- See comment in brw_untyped_atomic(). */ - const unsigned mask = (devinfo->gen == 7 && !devinfo->is_haswell && !align1 ? - WRITEMASK_X : WRITEMASK_XYZW); - struct brw_inst *insn = brw_send_indirect_surface_message( - p, sfid, brw_writemask(brw_null_reg(), mask), - payload, surface, msg_length, 0, true); + const unsigned mask = !has_simd4x2 && !align1 ? WRITEMASK_X : WRITEMASK_XYZW; - brw_set_dp_typed_surface_write_message( - p, insn, num_channels); + brw_send_indirect_surface_message(p, sfid, brw_writemask(brw_null_reg(), mask), + payload, surface, desc); } static void brw_set_memory_fence_message(struct brw_codegen *p, struct brw_inst *insn, enum brw_message_target sfid, - bool commit_enable) + bool commit_enable, + unsigned bti) { const struct gen_device_info *devinfo = p->devinfo; - brw_set_message_descriptor(p, insn, sfid, - 1 /* message length */, - (commit_enable ? 1 : 0) /* response length */, - true /* header present */, - false); + brw_set_desc(p, insn, brw_message_desc( + devinfo, 1, (commit_enable ? 1 : 0), true)); + + brw_inst_set_sfid(devinfo, insn, sfid); switch (sfid) { case GEN6_SFID_DATAPORT_RENDER_CACHE: @@ -3082,51 +3151,34 @@ brw_set_memory_fence_message(struct brw_codegen *p, if (commit_enable) brw_inst_set_dp_msg_control(devinfo, insn, 1 << 5); + + assert(devinfo->gen >= 11 || bti == 0); + brw_inst_set_binding_table_index(devinfo, insn, bti); } void brw_memory_fence(struct brw_codegen *p, - struct brw_reg dst) + struct brw_reg dst, + struct brw_reg src, + enum opcode send_op, + enum brw_message_target sfid, + bool commit_enable, + unsigned bti) { const struct gen_device_info *devinfo = p->devinfo; - const bool commit_enable = devinfo->gen == 7 && !devinfo->is_haswell; - struct brw_inst *insn; - brw_push_insn_state(p); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_set_default_exec_size(p, BRW_EXECUTE_1); - dst = vec1(dst); + dst = retype(vec1(dst), BRW_REGISTER_TYPE_UW); + src = retype(vec1(src), BRW_REGISTER_TYPE_UD); /* Set dst as destination for dependency tracking, the MEMORY_FENCE * message doesn't write anything back. */ - insn = next_insn(p, BRW_OPCODE_SEND); - dst = retype(dst, BRW_REGISTER_TYPE_UW); + struct brw_inst *insn = next_insn(p, send_op); + brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); + brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1); brw_set_dest(p, insn, dst); - brw_set_src0(p, insn, dst); - brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, - commit_enable); - - if (devinfo->gen == 7 && !devinfo->is_haswell) { - /* IVB does typed surface access through the render cache, so we need to - * flush it too. Use a different register so both flushes can be - * pipelined by the hardware. - */ - insn = next_insn(p, BRW_OPCODE_SEND); - brw_set_dest(p, insn, offset(dst, 1)); - brw_set_src0(p, insn, offset(dst, 1)); - brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE, - commit_enable); - - /* Now write the response of the second message into the response of the - * first to trigger a pipeline stall -- This way future render and data - * cache messages will be properly ordered with respect to past data and - * render cache messages. - */ - brw_MOV(p, dst, offset(dst, 1)); - } - - brw_pop_insn_state(p); + brw_set_src0(p, insn, src); + brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti); } void @@ -3140,24 +3192,24 @@ brw_pixel_interpolator_query(struct brw_codegen *p, unsigned response_length) { const struct gen_device_info *devinfo = p->devinfo; - struct brw_inst *insn; - const uint16_t exec_size = brw_inst_exec_size(devinfo, p->current); + const uint16_t exec_size = brw_get_default_exec_size(p); + const unsigned slot_group = brw_get_default_group(p) / 16; + const unsigned simd_mode = (exec_size == BRW_EXECUTE_16); + const unsigned desc = + brw_message_desc(devinfo, msg_length, response_length, false) | + brw_pixel_interp_desc(devinfo, mode, noperspective, simd_mode, + slot_group); /* brw_send_indirect_message will automatically use a direct send message * if data is actually immediate. */ - insn = brw_send_indirect_message(p, - GEN7_SFID_PIXEL_INTERPOLATOR, - dest, - mrf, - vec1(data)); - brw_inst_set_mlen(devinfo, insn, msg_length); - brw_inst_set_rlen(devinfo, insn, response_length); - - brw_inst_set_pi_simd_mode(devinfo, insn, exec_size == BRW_EXECUTE_16); - brw_inst_set_pi_slot_group(devinfo, insn, 0); /* zero unless 32/64px dispatch */ - brw_inst_set_pi_nopersp(devinfo, insn, noperspective); - brw_inst_set_pi_message_type(devinfo, insn, mode); + brw_send_indirect_message(p, + GEN7_SFID_PIXEL_INTERPOLATOR, + dest, + mrf, + vec1(data), + desc, + false); } void @@ -3165,8 +3217,8 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst, struct brw_reg mask) { const struct gen_device_info *devinfo = p->devinfo; - const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current); - const unsigned qtr_control = brw_inst_qtr_control(devinfo, p->current); + const unsigned exec_size = 1 << brw_get_default_exec_size(p); + const unsigned qtr_control = brw_get_default_group(p) / 8; brw_inst *inst; assert(devinfo->gen >= 7); @@ -3174,7 +3226,15 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst, brw_push_insn_state(p); - if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { + /* The flag register is only used on Gen7 in align1 mode, so avoid setting + * unnecessary bits in the instruction words, get the information we need + * and reset the default flag register. This allows more instructions to be + * compacted. + */ + const unsigned flag_subreg = p->current->flag_subreg; + brw_set_default_flag_reg(p, 0, 0); + + if (brw_get_default_access_mode(p) == BRW_ALIGN_1) { brw_set_default_mask_control(p, BRW_MASK_DISABLE); if (devinfo->gen >= 8) { @@ -3187,6 +3247,7 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst, struct brw_reg exec_mask = retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD); + brw_set_default_exec_size(p, BRW_EXECUTE_1); if (mask.file != BRW_IMMEDIATE_VALUE || mask.ud != 0xffffffff) { /* Unfortunately, ce0 does not take into account the thread * dispatch mask, which may be a problem in cases where it's not @@ -3196,6 +3257,7 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst, * hardware. */ brw_SHR(p, vec1(dst), mask, brw_imm_ud(qtr_control * 8)); + brw_set_default_swsb(p, tgl_swsb_regdist(1)); brw_AND(p, vec1(dst), exec_mask, vec1(dst)); exec_mask = vec1(dst); } @@ -3206,8 +3268,9 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst, */ inst = brw_FBL(p, vec1(dst), exec_mask); } else { - const struct brw_reg flag = brw_flag_reg(1, 0); + const struct brw_reg flag = brw_flag_subreg(flag_subreg); + brw_set_default_exec_size(p, BRW_EXECUTE_1); brw_MOV(p, retype(flag, BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); /* Run enough instructions returning zero with execution masking and @@ -3224,8 +3287,9 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst, brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE); brw_inst_set_group(devinfo, inst, lower_size * i + 8 * qtr_control); brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_Z); - brw_inst_set_flag_reg_nr(devinfo, inst, 1); brw_inst_set_exec_size(devinfo, inst, cvt(lower_size) - 1); + brw_inst_set_flag_reg_nr(devinfo, inst, flag_subreg / 2); + brw_inst_set_flag_subreg_nr(devinfo, inst, flag_subreg % 2); } /* Find the first bit set in the exec_size-wide portion of the flag @@ -3233,6 +3297,7 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst, * instructions. */ const enum brw_reg_type type = brw_int_type(exec_size / 8, false); + brw_set_default_exec_size(p, BRW_EXECUTE_1); brw_FBL(p, vec1(dst), byte_offset(retype(flag, type), qtr_control)); } } else { @@ -3277,7 +3342,7 @@ brw_broadcast(struct brw_codegen *p, struct brw_reg idx) { const struct gen_device_info *devinfo = p->devinfo; - const bool align1 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1; + const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; brw_inst *inst; brw_push_insn_state(p); @@ -3286,6 +3351,8 @@ brw_broadcast(struct brw_codegen *p, assert(src.file == BRW_GENERAL_REGISTER_FILE && src.address_mode == BRW_ADDRESS_DIRECT); + assert(!src.abs && !src.negate); + assert(src.type == dst.type); if ((src.vstride == 0 && (src.hstride == 0 || !align1)) || idx.file == BRW_IMMEDIATE_VALUE) { @@ -3298,10 +3365,24 @@ brw_broadcast(struct brw_codegen *p, (align1 ? stride(suboffset(src, i), 0, 1, 0) : stride(suboffset(src, 4 * i), 0, 4, 1))); } else { + /* From the Haswell PRM section "Register Region Restrictions": + * + * "The lower bits of the AddressImmediate must not overflow to + * change the register address. The lower 5 bits of Address + * Immediate when added to lower 5 bits of address register gives + * the sub-register offset. The upper bits of Address Immediate + * when added to upper bits of address register gives the register + * address. Any overflow from sub-register offset is dropped." + * + * Fortunately, for broadcast, we never have a sub-register offset so + * this isn't an issue. + */ + assert(src.subnr == 0); + if (align1) { const struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); - const unsigned offset = src.nr * REG_SIZE + src.subnr; + unsigned offset = src.nr * REG_SIZE + src.subnr; /* Limit in bytes of the signed indirect addressing immediate. */ const unsigned limit = 512; @@ -3312,22 +3393,49 @@ brw_broadcast(struct brw_codegen *p, /* Take into account the component size and horizontal stride. */ assert(src.vstride == src.hstride + src.width); brw_SHL(p, addr, vec1(idx), - brw_imm_ud(_mesa_logbase2(type_sz(src.type)) + + brw_imm_ud(util_logbase2(type_sz(src.type)) + src.hstride - 1)); /* We can only address up to limit bytes using the indirect * addressing immediate, account for the difference if the source * register is above this limit. */ - if (offset >= limit) + if (offset >= limit) { + brw_set_default_swsb(p, tgl_swsb_regdist(1)); brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit)); + offset = offset % limit; + } brw_pop_insn_state(p); + brw_set_default_swsb(p, tgl_swsb_regdist(1)); + /* Use indirect addressing to fetch the specified component. */ - brw_MOV(p, dst, - retype(brw_vec1_indirect(addr.subnr, offset % limit), - src.type)); + if (type_sz(src.type) > 4 && + (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) { + /* From the Cherryview PRM Vol 7. "Register Region Restrictions": + * + * "When source or destination datatype is 64b or operation is + * integer DWord multiply, indirect addressing must not be + * used." + * + * To work around both of this issue, we do two integer MOVs + * insead of one 64-bit MOV. Because no double value should ever + * cross a register boundary, it's safe to use the immediate + * offset in the indirect here to handle adding 4 bytes to the + * offset and avoid the extra ADD to the register file. + */ + brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0), + retype(brw_vec1_indirect(addr.subnr, offset), + BRW_REGISTER_TYPE_D)); + brw_set_default_swsb(p, tgl_swsb_null()); + brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1), + retype(brw_vec1_indirect(addr.subnr, offset + 4), + BRW_REGISTER_TYPE_D)); + } else { + brw_MOV(p, dst, + retype(brw_vec1_indirect(addr.subnr, offset), src.type)); + } } else { /* In SIMD4x2 mode the index can be either zero or one, replicate it * to all bits of a flag register, @@ -3371,10 +3479,11 @@ void brw_shader_time_add(struct brw_codegen *p, struct brw_reg payload, uint32_t surf_index) { - const unsigned sfid = (p->devinfo->gen >= 8 || p->devinfo->is_haswell ? + const struct gen_device_info *devinfo = p->devinfo; + const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? HSW_SFID_DATAPORT_DATA_CACHE_1 : GEN7_SFID_DATAPORT_DATA_CACHE); - assert(p->devinfo->gen >= 7); + assert(devinfo->gen >= 7); brw_push_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_1); @@ -3389,10 +3498,12 @@ void brw_shader_time_add(struct brw_codegen *p, BRW_ARF_NULL, 0)); brw_set_src0(p, send, brw_vec1_reg(payload.file, payload.nr, 0)); - brw_set_src1(p, send, brw_imm_ud(0)); - brw_set_message_descriptor(p, send, sfid, 2, 0, false, false); - brw_inst_set_binding_table_index(p->devinfo, send, surf_index); - brw_set_dp_untyped_atomic_message(p, send, BRW_AOP_ADD, false); + brw_set_desc(p, send, (brw_message_desc(devinfo, 2, 0, false) | + brw_dp_untyped_atomic_desc(devinfo, 1, BRW_AOP_ADD, + false))); + + brw_inst_set_sfid(devinfo, send, sfid); + brw_inst_set_binding_table_index(devinfo, send, surf_index); brw_pop_insn_state(p); } @@ -3415,14 +3526,9 @@ brw_barrier(struct brw_codegen *p, struct brw_reg src) brw_set_dest(p, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW)); brw_set_src0(p, inst, src); brw_set_src1(p, inst, brw_null_reg()); + brw_set_desc(p, inst, brw_message_desc(devinfo, 1, 0, false)); - brw_set_message_descriptor(p, inst, BRW_SFID_MESSAGE_GATEWAY, - 1 /* msg_length */, - 0 /* response_length */, - false /* header_present */, - false /* end_of_thread */); - - brw_inst_set_gateway_notify(devinfo, inst, 1); + brw_inst_set_sfid(devinfo, inst, BRW_SFID_MESSAGE_GATEWAY); brw_inst_set_gateway_subfuncid(devinfo, inst, BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG); @@ -3450,3 +3556,36 @@ brw_WAIT(struct brw_codegen *p) brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1); brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); } + +void +brw_float_controls_mode(struct brw_codegen *p, + unsigned mode, unsigned mask) +{ + /* From the Skylake PRM, Volume 7, page 760: + * "Implementation Restriction on Register Access: When the control + * register is used as an explicit source and/or destination, hardware + * does not ensure execution pipeline coherency. Software must set the + * thread control field to ‘switch’ for an instruction that uses + * control register as an explicit operand." + * + * On Gen12+ this is implemented in terms of SWSB annotations instead. + */ + brw_set_default_swsb(p, tgl_swsb_regdist(1)); + + brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), + brw_imm_ud(~mask)); + brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1); + if (p->devinfo->gen < 12) + brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH); + + if (mode) { + brw_inst *inst_or = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), + brw_imm_ud(mode)); + brw_inst_set_exec_size(p->devinfo, inst_or, BRW_EXECUTE_1); + if (p->devinfo->gen < 12) + brw_inst_set_thread_control(p->devinfo, inst_or, BRW_THREAD_SWITCH); + } + + if (p->devinfo->gen >= 12) + brw_SYNC(p, TGL_SYNC_NOP); +}