X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fcompiler%2Fbrw_eu_emit.c;h=0d5c755f9e8d36e9ee1b7db70af4b2b63fc9aed6;hb=c883c482be2ab4ab65e4905a447a0edf3e18418b;hp=44577de62e8a143a9c63de0acd7a96efe1a9b904;hpb=ca94717035f695fd25dd07bb16167c12520d86c3;p=mesa.git diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 44577de62e8..0d5c755f9e8 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -237,7 +237,7 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) assert(reg.file != BRW_IMMEDIATE_VALUE); assert(reg.address_mode == BRW_ADDRESS_DIRECT); assert(reg.subnr == 0); - assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 || + assert(has_scalar_region(reg) || (reg.hstride == BRW_HORIZONTAL_STRIDE_1 && reg.vstride == reg.width + 1)); assert(!reg.negate && !reg.abs); @@ -249,8 +249,9 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) assert(reg.file == BRW_GENERAL_REGISTER_FILE); assert(reg.address_mode == BRW_ADDRESS_DIRECT); assert(reg.subnr % 16 == 0); - assert(reg.hstride == BRW_HORIZONTAL_STRIDE_1 && - reg.vstride == reg.width + 1); + assert(has_scalar_region(reg) || + (reg.hstride == BRW_HORIZONTAL_STRIDE_1 && + reg.vstride == reg.width + 1)); assert(!reg.negate && !reg.abs); brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr); brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16); @@ -357,7 +358,7 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) reg.file == BRW_ARCHITECTURE_REGISTER_FILE); assert(reg.address_mode == BRW_ADDRESS_DIRECT); assert(reg.subnr == 0); - assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 || + assert(has_scalar_region(reg) || (reg.hstride == BRW_HORIZONTAL_STRIDE_1 && reg.vstride == reg.width + 1)); assert(!reg.negate && !reg.abs); @@ -593,7 +594,7 @@ gen7_set_dp_scratch_message(struct brw_codegen *p, const struct gen_device_info *devinfo = p->devinfo; assert(num_regs == 1 || num_regs == 2 || num_regs == 4 || (devinfo->gen >= 8 && num_regs == 8)); - const unsigned block_size = (devinfo->gen >= 8 ? _mesa_logbase2(num_regs) : + const unsigned block_size = (devinfo->gen >= 8 ? util_logbase2(num_regs) : num_regs - 1); brw_set_desc(p, inst, brw_message_desc( @@ -890,7 +891,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, dest.file == BRW_MESSAGE_REGISTER_FILE); } brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr); - brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 16); + brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4); brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask); assert(src0.file == BRW_GENERAL_REGISTER_FILE); @@ -1020,33 +1021,6 @@ brw_inst *brw_##OP(struct brw_codegen *p, \ return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \ } -/* Rounding operations (other than RNDD) require two instructions - the first - * stores a rounded value (possibly the wrong way) in the dest register, but - * also sets a per-channel "increment bit" in the flag register. A predicated - * add of 1.0 fixes dest to contain the desired result. - * - * Sandybridge and later appear to round correctly without an ADD. - */ -#define ROUND(OP) \ -void brw_##OP(struct brw_codegen *p, \ - struct brw_reg dest, \ - struct brw_reg src) \ -{ \ - const struct gen_device_info *devinfo = p->devinfo; \ - brw_inst *rnd, *add; \ - rnd = next_insn(p, BRW_OPCODE_##OP); \ - brw_set_dest(p, rnd, dest); \ - brw_set_src0(p, rnd, src); \ - \ - if (devinfo->gen < 6) { \ - /* turn on round-increments */ \ - brw_inst_set_cond_modifier(devinfo, rnd, BRW_CONDITIONAL_R); \ - add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ - brw_inst_set_pred_control(devinfo, add, BRW_PREDICATE_NORMAL); \ - } \ -} - - ALU2(SEL) ALU1(NOT) ALU2(AND) @@ -1061,6 +1035,9 @@ ALU2(ROR) ALU3(CSEL) ALU1(FRC) ALU1(RNDD) +ALU1(RNDE) +ALU1(RNDU) +ALU1(RNDZ) ALU2(MAC) ALU2(MACH) ALU1(LZD) @@ -1080,9 +1057,6 @@ ALU1(CBIT) ALU2(ADDC) ALU2(SUBB) -ROUND(RNDZ) -ROUND(RNDE) - brw_inst * brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0) { @@ -1753,14 +1727,23 @@ brw_CONT(struct brw_codegen *p) } brw_inst * -gen6_HALT(struct brw_codegen *p) +brw_HALT(struct brw_codegen *p) { const struct gen_device_info *devinfo = p->devinfo; brw_inst *insn; insn = next_insn(p, BRW_OPCODE_HALT); brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - if (devinfo->gen < 8) { + if (devinfo->gen < 6) { + /* From the Gen4 PRM: + * + * "IP register must be put (for example, by the assembler) at + * and locations. + */ + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); /* exitcode updated later. */ + } else if (devinfo->gen < 8) { brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */ } else if (devinfo->gen < 12) { @@ -2705,7 +2688,7 @@ brw_send_indirect_split_message(struct brw_codegen *p, } if (ex_desc.file == BRW_IMMEDIATE_VALUE && - (ex_desc.ud & INTEL_MASK(15, 12)) == 0) { + (devinfo->gen >= 12 || (ex_desc.ud & INTEL_MASK(15, 12)) == 0)) { ex_desc.ud |= ex_desc_imm; } else { const struct tgl_swsb swsb = brw_get_default_swsb(p); @@ -2731,8 +2714,9 @@ brw_send_indirect_split_message(struct brw_codegen *p, unsigned imm_part = ex_desc_imm | sfid | eot << 5; if (ex_desc.file == BRW_IMMEDIATE_VALUE) { - /* ex_desc bits 15:12 don't exist in the instruction encoding, so - * we may have fallen back to an indirect extended descriptor. + /* ex_desc bits 15:12 don't exist in the instruction encoding prior + * to Gen12, so we may have fallen back to an indirect extended + * descriptor. */ brw_MOV(p, addr, brw_imm_ud(ex_desc.ud | imm_part)); } else { @@ -3177,57 +3161,24 @@ brw_memory_fence(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src, enum opcode send_op, - bool stall, + enum brw_message_target sfid, + bool commit_enable, unsigned bti) { const struct gen_device_info *devinfo = p->devinfo; - const bool commit_enable = stall || - devinfo->gen >= 10 || /* HSD ES # 1404612949 */ - (devinfo->gen == 7 && !devinfo->is_haswell); - struct brw_inst *insn; - brw_push_insn_state(p); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_set_default_exec_size(p, BRW_EXECUTE_1); dst = retype(vec1(dst), BRW_REGISTER_TYPE_UW); src = retype(vec1(src), BRW_REGISTER_TYPE_UD); /* Set dst as destination for dependency tracking, the MEMORY_FENCE * message doesn't write anything back. */ - insn = next_insn(p, send_op); + struct brw_inst *insn = next_insn(p, send_op); + brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); + brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1); brw_set_dest(p, insn, dst); brw_set_src0(p, insn, src); - brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, - commit_enable, bti); - - if (devinfo->gen == 7 && !devinfo->is_haswell) { - /* IVB does typed surface access through the render cache, so we need to - * flush it too. Use a different register so both flushes can be - * pipelined by the hardware. - */ - insn = next_insn(p, send_op); - brw_set_dest(p, insn, offset(dst, 1)); - brw_set_src0(p, insn, src); - brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE, - commit_enable, bti); - - /* Now write the response of the second message into the response of the - * first to trigger a pipeline stall -- This way future render and data - * cache messages will be properly ordered with respect to past data and - * render cache messages. - */ - brw_MOV(p, dst, offset(dst, 1)); - } - - if (stall) { - brw_set_default_swsb(p, tgl_swsb_sbid(TGL_SBID_DST, - brw_get_default_swsb(p).sbid)); - - brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW), dst); - } - - brw_pop_insn_state(p); + brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti); } void @@ -3442,7 +3393,7 @@ brw_broadcast(struct brw_codegen *p, /* Take into account the component size and horizontal stride. */ assert(src.vstride == src.hstride + src.width); brw_SHL(p, addr, vec1(idx), - brw_imm_ud(_mesa_logbase2(type_sz(src.type)) + + brw_imm_ud(util_logbase2(type_sz(src.type)) + src.hstride - 1)); /* We can only address up to limit bytes using the indirect