X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_fs_cse.cpp;h=48220efd73040bad690afb28038319aebaafccfd;hb=ed65e6ef49e17e9cae93a8f98e2968346de2bc6e;hp=e33fe6a98024b512b96f0d44cf9586da1aff72e6;hpb=dabec9c293ee29335f5a6d5d1d3c2b7a715605c1;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index e33fe6a9802..48220efd730 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -61,6 +61,7 @@ is_expression(const fs_visitor *v, const fs_inst *const inst) case BRW_OPCODE_CMPN: case BRW_OPCODE_ADD: case BRW_OPCODE_MUL: + case SHADER_OPCODE_MULH: case BRW_OPCODE_FRC: case BRW_OPCODE_RNDU: case BRW_OPCODE_RNDD: @@ -70,13 +71,29 @@ is_expression(const fs_visitor *v, const fs_inst *const inst) case BRW_OPCODE_PLN: case BRW_OPCODE_MAD: case BRW_OPCODE_LRP: + case FS_OPCODE_FB_READ_LOGICAL: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD: case FS_OPCODE_CINTERP: case FS_OPCODE_LINTERP: case SHADER_OPCODE_FIND_LIVE_CHANNEL: case SHADER_OPCODE_BROADCAST: + case SHADER_OPCODE_MOV_INDIRECT: + case SHADER_OPCODE_TEX_LOGICAL: + case SHADER_OPCODE_TXD_LOGICAL: + case SHADER_OPCODE_TXF_LOGICAL: + case SHADER_OPCODE_TXL_LOGICAL: + case SHADER_OPCODE_TXS_LOGICAL: + case FS_OPCODE_TXB_LOGICAL: + case SHADER_OPCODE_TXF_CMS_LOGICAL: + case SHADER_OPCODE_TXF_CMS_W_LOGICAL: + case SHADER_OPCODE_TXF_UMS_LOGICAL: + case SHADER_OPCODE_TXF_MCS_LOGICAL: + case SHADER_OPCODE_LOD_LOGICAL: + case SHADER_OPCODE_TG4_LOGICAL: + case SHADER_OPCODE_TG4_OFFSET_LOGICAL: + case FS_OPCODE_PACK: return true; case SHADER_OPCODE_RCP: case SHADER_OPCODE_RSQ: @@ -92,7 +109,8 @@ is_expression(const fs_visitor *v, const fs_inst *const inst) case SHADER_OPCODE_LOAD_PAYLOAD: return !inst->is_copy_payload(v->alloc); default: - return inst->is_send_from_grf() && !inst->has_side_effects(); + return inst->is_send_from_grf() && !inst->has_side_effects() && + !inst->is_volatile(); } } @@ -108,20 +126,20 @@ operands_match(const fs_inst *a, const fs_inst *b, bool *negate) (xs[2].equals(ys[1]) && xs[1].equals(ys[2]))); } else if (a->opcode == BRW_OPCODE_MUL && a->dst.type == BRW_REGISTER_TYPE_F) { bool xs0_negate = xs[0].negate; - bool xs1_negate = xs[1].file == IMM ? xs[1].fixed_hw_reg.dw1.f < 0.0f + bool xs1_negate = xs[1].file == IMM ? xs[1].f < 0.0f : xs[1].negate; bool ys0_negate = ys[0].negate; - bool ys1_negate = ys[1].file == IMM ? ys[1].fixed_hw_reg.dw1.f < 0.0f + bool ys1_negate = ys[1].file == IMM ? ys[1].f < 0.0f : ys[1].negate; - float xs1_imm = xs[1].fixed_hw_reg.dw1.f; - float ys1_imm = ys[1].fixed_hw_reg.dw1.f; + float xs1_imm = xs[1].f; + float ys1_imm = ys[1].f; xs[0].negate = false; xs[1].negate = false; ys[0].negate = false; ys[1].negate = false; - xs[1].fixed_hw_reg.dw1.f = fabsf(xs[1].fixed_hw_reg.dw1.f); - ys[1].fixed_hw_reg.dw1.f = fabsf(ys[1].fixed_hw_reg.dw1.f); + xs[1].f = fabsf(xs[1].f); + ys[1].f = fabsf(ys[1].f); bool ret = (xs[0].equals(ys[0]) && xs[1].equals(ys[1])) || (xs[1].equals(ys[0]) && xs[0].equals(ys[1])); @@ -130,10 +148,12 @@ operands_match(const fs_inst *a, const fs_inst *b, bool *negate) xs[1].negate = xs[1].file == IMM ? false : xs1_negate; ys[0].negate = ys0_negate; ys[1].negate = ys[1].file == IMM ? false : ys1_negate; - xs[1].fixed_hw_reg.dw1.f = xs1_imm; - ys[1].fixed_hw_reg.dw1.f = ys1_imm; + xs[1].f = xs1_imm; + ys[1].f = ys1_imm; *negate = (xs0_negate != xs1_negate) != (ys0_negate != ys1_negate); + if (*negate && (a->saturate || b->saturate)) + return false; return ret; } else if (!a->is_commutative()) { bool match = true; @@ -156,7 +176,7 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate) return a->opcode == b->opcode && a->force_writemask_all == b->force_writemask_all && a->exec_size == b->exec_size && - a->force_sechalf == b->force_sechalf && + a->group == b->group && a->saturate == b->saturate && a->predicate == b->predicate && a->predicate_inverse == b->predicate_inverse && @@ -165,12 +185,13 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate) a->dst.type == b->dst.type && a->offset == b->offset && a->mlen == b->mlen && - a->regs_written == b->regs_written && + a->size_written == b->size_written && a->base_mrf == b->base_mrf && a->eot == b->eot && a->header_size == b->header_size && a->shadow_compare == b->shadow_compare && a->pi_noperspective == b->pi_noperspective && + a->target == b->target && a->sources == b->sources && operands_match(a, b, negate); } @@ -178,13 +199,13 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate) static void create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate) { - int written = inst->regs_written; - int dst_width = inst->exec_size / 8; - const fs_builder ubld = bld.exec_all(inst->force_writemask_all) - .group(inst->exec_size, inst->force_sechalf); + unsigned written = regs_written(inst); + unsigned dst_width = + DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE); fs_inst *copy; - if (written > dst_width) { + if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD || + written != dst_width) { fs_reg *payload; int sources, header_size; if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) { @@ -196,22 +217,24 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate) header_size = 0; } - assert(src.file == GRF); + assert(src.file == VGRF); payload = ralloc_array(bld.shader->mem_ctx, fs_reg, sources); for (int i = 0; i < header_size; i++) { payload[i] = src; - src.reg_offset++; + src.offset += REG_SIZE; } for (int i = header_size; i < sources; i++) { payload[i] = src; - src = offset(src, ubld, 1); + src = offset(src, bld, 1); } - copy = ubld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size); + copy = bld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size); } else { - copy = ubld.MOV(inst->dst, src); + copy = bld.MOV(inst->dst, src); + copy->group = inst->group; + copy->force_writemask_all = inst->force_writemask_all; copy->src[0].negate = negate; } - assert(copy->regs_written == written); + assert(regs_written(copy) == written); } bool @@ -226,7 +249,8 @@ fs_visitor::opt_cse_local(bblock_t *block) foreach_inst_in_block(fs_inst, inst, block) { /* Skip some cases. */ if (is_expression(this, inst) && !inst->is_partial_write() && - (inst->dst.file != HW_REG || inst->dst.is_null())) + ((inst->dst.file != ARF && inst->dst.file != FIXED_GRF) || + inst->dst.is_null())) { bool found = false; bool negate = false; @@ -258,23 +282,25 @@ fs_visitor::opt_cse_local(bblock_t *block) */ bool no_existing_temp = entry->tmp.file == BAD_FILE; if (no_existing_temp && !entry->generator->dst.is_null()) { - int written = entry->generator->regs_written; + const fs_builder ibld = fs_builder(this, block, entry->generator) + .at(block, entry->generator->next); + int written = regs_written(entry->generator); - entry->tmp = fs_reg(GRF, alloc.allocate(written), + entry->tmp = fs_reg(VGRF, alloc.allocate(written), entry->generator->dst.type); - create_copy_instr(bld.at(block, entry->generator->next), - entry->generator, entry->tmp, false); + create_copy_instr(ibld, entry->generator, entry->tmp, false); entry->generator->dst = entry->tmp; } /* dest <- temp */ if (!inst->dst.is_null()) { - assert(inst->regs_written == entry->generator->regs_written); + assert(inst->size_written == entry->generator->size_written); assert(inst->dst.type == entry->tmp.type); + const fs_builder ibld(this, block, inst); - create_copy_instr(bld.at(block, inst), inst, entry->tmp, negate); + create_copy_instr(ibld, inst, entry->tmp, negate); } /* Set our iterator so that next time through the loop inst->next @@ -292,10 +318,10 @@ fs_visitor::opt_cse_local(bblock_t *block) /* Kill all AEB entries that write a different value to or read from * the flag register if we just wrote it. */ - if (inst->writes_flag()) { + if (inst->flags_written()) { bool negate; /* dummy */ - if (entry->generator->reads_flag() || - (entry->generator->writes_flag() && + if (entry->generator->flags_read(devinfo) || + (entry->generator->flags_written() && !instructions_match(inst, entry->generator, &negate))) { entry->remove(); ralloc_free(entry); @@ -309,7 +335,9 @@ fs_visitor::opt_cse_local(bblock_t *block) /* Kill all AEB entries that use the destination we just * overwrote. */ - if (inst->overwrites_reg(entry->generator->src[i])) { + if (regions_overlap(inst->dst, inst->size_written, + entry->generator->src[i], + entry->generator->size_read(i))) { entry->remove(); ralloc_free(entry); break; @@ -318,7 +346,7 @@ fs_visitor::opt_cse_local(bblock_t *block) /* Kill any AEB entries using registers that don't get reused any * more -- a sure sign they'll fail operands_match(). */ - if (src_reg->file == GRF && virtual_grf_end[src_reg->reg] < ip) { + if (src_reg->file == VGRF && virtual_grf_end[src_reg->nr] < ip) { entry->remove(); ralloc_free(entry); break;