X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fcompiler%2Fbrw_eu_emit.c;h=6cb4f7bdbf12c795857b43f4b557f5bf83628dc2;hb=95d8b4ac0bbef31aa99e21ee6ce18eb09c05d934;hp=06084dcdc507010f177c51a5a62b36721dfefc09;hpb=d6a6e103901507af0a4dc17052443dece430d0c1;p=mesa.git diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 06084dcdc50..6cb4f7bdbf1 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -94,9 +94,32 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest) else if (dest.file == BRW_GENERAL_REGISTER_FILE) assert(dest.nr < 128); + /* The hardware has a restriction where if the destination is Byte, + * the instruction needs to have a stride of 2 (except for packed byte + * MOV). This seems to be required even if the destination is the NULL + * register. + */ + if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && + dest.nr == BRW_ARF_NULL && + type_sz(dest.type) == 1) { + dest.hstride = BRW_HORIZONTAL_STRIDE_2; + } + gen7_convert_mrf_to_grf(p, &dest); - { + if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) { + assert(dest.file == BRW_GENERAL_REGISTER_FILE || + dest.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(dest.address_mode == BRW_ADDRESS_DIRECT); + assert(dest.subnr % 16 == 0); + assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 && + dest.vstride == dest.width + 1); + assert(!dest.negate && !dest.abs); + brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr); + brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16); + brw_inst_set_send_dst_reg_file(devinfo, inst, dest.file); + } else { brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type); brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode); @@ -177,8 +200,11 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) gen7_convert_mrf_to_grf(p, ®); - if (devinfo->gen >= 6 && (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || - brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) { + if (devinfo->gen >= 6 && + (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC)) { /* Any source modifiers or regions will be ignored, since this just * identifies the MRF/GRF to start reading the message contents from. * Check for some likely failures. @@ -188,7 +214,17 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) assert(reg.address_mode == BRW_ADDRESS_DIRECT); } - { + if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) { + assert(reg.file == BRW_GENERAL_REGISTER_FILE); + assert(reg.address_mode == BRW_ADDRESS_DIRECT); + assert(reg.subnr % 16 == 0); + assert(reg.hstride == BRW_HORIZONTAL_STRIDE_1 && + reg.vstride == reg.width + 1); + assert(!reg.negate && !reg.abs); + brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr); + brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16); + } else { brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type); brw_inst_set_src0_abs(devinfo, inst, reg.abs); brw_inst_set_src0_negate(devinfo, inst, reg.negate); @@ -282,7 +318,18 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) if (reg.file == BRW_GENERAL_REGISTER_FILE) assert(reg.nr < 128); - { + if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) { + assert(reg.file == BRW_GENERAL_REGISTER_FILE || + reg.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(reg.address_mode == BRW_ADDRESS_DIRECT); + assert(reg.subnr == 0); + assert(reg.hstride == BRW_HORIZONTAL_STRIDE_1 && + reg.vstride == reg.width + 1); + assert(!reg.negate && !reg.abs); + brw_inst_set_send_src1_reg_nr(devinfo, inst, reg.nr); + brw_inst_set_send_src1_reg_file(devinfo, inst, reg.file); + } else { /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5: * * "Accumulator registers may be accessed explicitly as src0 @@ -660,9 +707,9 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, gen7_convert_mrf_to_grf(p, &dest); assert(dest.nr < 128); - assert(src0.file != BRW_IMMEDIATE_VALUE || src0.nr < 128); - assert(src1.file != BRW_IMMEDIATE_VALUE || src1.nr < 128); - assert(src2.file != BRW_IMMEDIATE_VALUE || src2.nr < 128); + assert(src0.file == BRW_IMMEDIATE_VALUE || src0.nr < 128); + assert(src1.file != BRW_IMMEDIATE_VALUE && src1.nr < 128); + assert(src2.file == BRW_IMMEDIATE_VALUE || src2.nr < 128); assert(dest.address_mode == BRW_ADDRESS_DIRECT); assert(src0.address_mode == BRW_ADDRESS_DIRECT); assert(src1.address_mode == BRW_ADDRESS_DIRECT); @@ -761,7 +808,8 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, assert(dest.type == BRW_REGISTER_TYPE_F || dest.type == BRW_REGISTER_TYPE_DF || dest.type == BRW_REGISTER_TYPE_D || - dest.type == BRW_REGISTER_TYPE_UD); + dest.type == BRW_REGISTER_TYPE_UD || + (dest.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 8)); if (devinfo->gen == 6) { brw_inst_set_3src_a16_dst_reg_file(devinfo, inst, dest.file == BRW_MESSAGE_REGISTER_FILE); @@ -806,6 +854,22 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, */ brw_inst_set_3src_a16_src_type(devinfo, inst, dest.type); brw_inst_set_3src_a16_dst_type(devinfo, inst, dest.type); + + /* From the Bspec, 3D Media GPGPU, Instruction fields, srcType: + * + * "Three source instructions can use operands with mixed-mode + * precision. When SrcType field is set to :f or :hf it defines + * precision for source 0 only, and fields Src1Type and Src2Type + * define precision for other source operands: + * + * 0b = :f. Single precision Float (32-bit). + * 1b = :hf. Half precision Float (16-bit)." + */ + if (src1.type == BRW_REGISTER_TYPE_HF) + brw_inst_set_3src_a16_src1_type(devinfo, inst, 1); + + if (src2.type == BRW_REGISTER_TYPE_HF) + brw_inst_set_3src_a16_src2_type(devinfo, inst, 1); } } @@ -1880,8 +1944,10 @@ void gen6_math(struct brw_codegen *p, assert(src1.file == BRW_GENERAL_REGISTER_FILE || (devinfo->gen >= 8 && src1.file == BRW_IMMEDIATE_VALUE)); } else { - assert(src0.type == BRW_REGISTER_TYPE_F); - assert(src1.type == BRW_REGISTER_TYPE_F); + assert(src0.type == BRW_REGISTER_TYPE_F || + (src0.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 9)); + assert(src1.type == BRW_REGISTER_TYPE_F || + (src1.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 9)); } /* Source modifiers are ignored for extended math instructions on Gen6. */ @@ -2445,7 +2511,8 @@ brw_send_indirect_message(struct brw_codegen *p, struct brw_reg dst, struct brw_reg payload, struct brw_reg desc, - unsigned desc_imm) + unsigned desc_imm, + bool eot) { const struct gen_device_info *devinfo = p->devinfo; struct brw_inst *send; @@ -2482,6 +2549,104 @@ brw_send_indirect_message(struct brw_codegen *p, brw_set_dest(p, send, dst); brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD)); brw_inst_set_sfid(devinfo, send, sfid); + brw_inst_set_eot(devinfo, send, eot); +} + +void +brw_send_indirect_split_message(struct brw_codegen *p, + unsigned sfid, + struct brw_reg dst, + struct brw_reg payload0, + struct brw_reg payload1, + struct brw_reg desc, + unsigned desc_imm, + struct brw_reg ex_desc, + unsigned ex_desc_imm, + bool eot) +{ + const struct gen_device_info *devinfo = p->devinfo; + struct brw_inst *send; + + dst = retype(dst, BRW_REGISTER_TYPE_UW); + + assert(desc.type == BRW_REGISTER_TYPE_UD); + + if (desc.file == BRW_IMMEDIATE_VALUE) { + desc.ud |= desc_imm; + } else { + struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); + + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_exec_size(p, BRW_EXECUTE_1); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + + /* Load the indirect descriptor to an address register using OR so the + * caller can specify additional descriptor bits with the desc_imm + * immediate. + */ + brw_OR(p, addr, desc, brw_imm_ud(desc_imm)); + + brw_pop_insn_state(p); + desc = addr; + } + + if (ex_desc.file == BRW_IMMEDIATE_VALUE) { + ex_desc.ud |= ex_desc_imm; + } else { + struct brw_reg addr = retype(brw_address_reg(2), BRW_REGISTER_TYPE_UD); + + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_exec_size(p, BRW_EXECUTE_1); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + + /* Load the indirect extended descriptor to an address register using OR + * so the caller can specify additional descriptor bits with the + * desc_imm immediate. + * + * Even though the instruction dispatcher always pulls the SFID and EOT + * fields from the instruction itself, actual external unit which + * processes the message gets the SFID and EOT from the extended + * descriptor which comes from the address register. If we don't OR + * those two bits in, the external unit may get confused and hang. + */ + brw_OR(p, addr, ex_desc, brw_imm_ud(ex_desc_imm | sfid | eot << 5)); + + brw_pop_insn_state(p); + ex_desc = addr; + } + + send = next_insn(p, BRW_OPCODE_SENDS); + brw_set_dest(p, send, dst); + brw_set_src0(p, send, retype(payload0, BRW_REGISTER_TYPE_UD)); + brw_set_src1(p, send, retype(payload1, BRW_REGISTER_TYPE_UD)); + + if (desc.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_send_sel_reg32_desc(devinfo, send, 0); + brw_inst_set_send_desc(devinfo, send, desc.ud); + } else { + assert(desc.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(desc.nr == BRW_ARF_ADDRESS); + assert(desc.subnr == 0); + brw_inst_set_send_sel_reg32_desc(devinfo, send, 1); + } + + if (ex_desc.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0); + brw_inst_set_send_ex_desc(devinfo, send, ex_desc.ud); + } else { + assert(ex_desc.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(ex_desc.nr == BRW_ARF_ADDRESS); + assert((ex_desc.subnr & 0x3) == 0); + brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1); + brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, ex_desc.subnr >> 2); + } + + brw_inst_set_sfid(devinfo, send, sfid); + brw_inst_set_eot(devinfo, send, eot); } static void @@ -2514,7 +2679,7 @@ brw_send_indirect_surface_message(struct brw_codegen *p, surface = addr; } - brw_send_indirect_message(p, sfid, dst, payload, surface, desc_imm); + brw_send_indirect_message(p, sfid, dst, payload, surface, desc_imm, false); } static bool @@ -2841,95 +3006,6 @@ brw_untyped_surface_write(struct brw_codegen *p, payload, surface, desc); } -void -brw_typed_atomic(struct brw_codegen *p, - struct brw_reg dst, - struct brw_reg payload, - struct brw_reg surface, - unsigned atomic_op, - unsigned msg_length, - bool response_expected, - bool header_present) { - const struct gen_device_info *devinfo = p->devinfo; - const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? - HSW_SFID_DATAPORT_DATA_CACHE_1 : - GEN6_SFID_DATAPORT_RENDER_CACHE); - const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; - /* SIMD4x2 typed atomic instructions only exist on HSW+ */ - const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell; - const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : - has_simd4x2 ? 0 : 8; - /* Typed atomics don't support SIMD16 */ - assert(exec_size <= 8); - const unsigned response_length = - brw_surface_payload_size(p, response_expected, exec_size); - const unsigned desc = - brw_message_desc(devinfo, msg_length, response_length, header_present) | - brw_dp_typed_atomic_desc(devinfo, exec_size, brw_get_default_group(p), - atomic_op, response_expected); - /* Mask out unused components -- See comment in brw_untyped_atomic(). */ - const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X; - - brw_send_indirect_surface_message(p, sfid, brw_writemask(dst, mask), - payload, surface, desc); -} - -void -brw_typed_surface_read(struct brw_codegen *p, - struct brw_reg dst, - struct brw_reg payload, - struct brw_reg surface, - unsigned msg_length, - unsigned num_channels, - bool header_present) -{ - const struct gen_device_info *devinfo = p->devinfo; - const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? - HSW_SFID_DATAPORT_DATA_CACHE_1 : - GEN6_SFID_DATAPORT_RENDER_CACHE); - const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; - /* SIMD4x2 typed read instructions only exist on HSW+ */ - const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell; - const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : - has_simd4x2 ? 0 : 8; - const unsigned response_length = - brw_surface_payload_size(p, num_channels, exec_size); - const unsigned desc = - brw_message_desc(devinfo, msg_length, response_length, header_present) | - brw_dp_typed_surface_rw_desc(devinfo, exec_size, brw_get_default_group(p), - num_channels, false); - - brw_send_indirect_surface_message(p, sfid, dst, payload, surface, desc); -} - -void -brw_typed_surface_write(struct brw_codegen *p, - struct brw_reg payload, - struct brw_reg surface, - unsigned msg_length, - unsigned num_channels, - bool header_present) -{ - const struct gen_device_info *devinfo = p->devinfo; - const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? - HSW_SFID_DATAPORT_DATA_CACHE_1 : - GEN6_SFID_DATAPORT_RENDER_CACHE); - const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; - /* SIMD4x2 typed read instructions only exist on HSW+ */ - const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell; - const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : - has_simd4x2 ? 0 : 8; - const unsigned desc = - brw_message_desc(devinfo, msg_length, 0, header_present) | - brw_dp_typed_surface_rw_desc(devinfo, exec_size, brw_get_default_group(p), - num_channels, true); - /* Mask out unused components -- See comment in brw_untyped_atomic(). */ - const unsigned mask = !has_simd4x2 && !align1 ? WRITEMASK_X : WRITEMASK_XYZW; - - brw_send_indirect_surface_message(p, sfid, brw_writemask(brw_null_reg(), mask), - payload, surface, desc); -} - static void brw_set_memory_fence_message(struct brw_codegen *p, struct brw_inst *insn, @@ -2961,10 +3037,12 @@ brw_set_memory_fence_message(struct brw_codegen *p, void brw_memory_fence(struct brw_codegen *p, struct brw_reg dst, - enum opcode send_op) + struct brw_reg src, + enum opcode send_op, + bool stall) { const struct gen_device_info *devinfo = p->devinfo; - const bool commit_enable = + const bool commit_enable = stall || devinfo->gen >= 10 || /* HSD ES # 1404612949 */ (devinfo->gen == 7 && !devinfo->is_haswell); struct brw_inst *insn; @@ -2972,15 +3050,15 @@ brw_memory_fence(struct brw_codegen *p, brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_exec_size(p, BRW_EXECUTE_1); - dst = vec1(dst); + dst = retype(vec1(dst), BRW_REGISTER_TYPE_UW); + src = retype(vec1(src), BRW_REGISTER_TYPE_UD); /* Set dst as destination for dependency tracking, the MEMORY_FENCE * message doesn't write anything back. */ insn = next_insn(p, send_op); - dst = retype(dst, BRW_REGISTER_TYPE_UW); brw_set_dest(p, insn, dst); - brw_set_src0(p, insn, dst); + brw_set_src0(p, insn, src); brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, commit_enable); @@ -2991,7 +3069,7 @@ brw_memory_fence(struct brw_codegen *p, */ insn = next_insn(p, send_op); brw_set_dest(p, insn, offset(dst, 1)); - brw_set_src0(p, insn, offset(dst, 1)); + brw_set_src0(p, insn, src); brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE, commit_enable); @@ -3003,6 +3081,9 @@ brw_memory_fence(struct brw_codegen *p, brw_MOV(p, dst, offset(dst, 1)); } + if (stall) + brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW), dst); + brw_pop_insn_state(p); } @@ -3033,7 +3114,8 @@ brw_pixel_interpolator_query(struct brw_codegen *p, dest, mrf, vec1(data), - desc); + desc, + false); } void