X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_fs_cse.cpp;h=48220efd73040bad690afb28038319aebaafccfd;hb=ed65e6ef49e17e9cae93a8f98e2968346de2bc6e;hp=8958e62180273ce6637a4a9c54d7c4a390051b03;hpb=f2fad0dc80627e853eea558498f18a9fa769992e;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 8958e621802..48220efd730 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -32,6 +32,8 @@ * 13.1 (p378). */ +using namespace brw; + namespace { struct aeb_entry : public exec_node { /** The instruction that generates the expression value. */ @@ -43,23 +45,7 @@ struct aeb_entry : public exec_node { } static bool -is_copy_payload(const fs_inst *inst) -{ - const int reg = inst->src[0].reg; - if (inst->src[0].reg_offset != 0) - return false; - - for (int i = 1; i < inst->sources; i++) { - if (inst->src[i].reg != reg || - inst->src[i].reg_offset != i) { - return false; - } - } - return true; -} - -static bool -is_expression(const fs_inst *const inst) +is_expression(const fs_visitor *v, const fs_inst *const inst) { switch (inst->opcode) { case BRW_OPCODE_MOV: @@ -75,6 +61,7 @@ is_expression(const fs_inst *const inst) case BRW_OPCODE_CMPN: case BRW_OPCODE_ADD: case BRW_OPCODE_MUL: + case SHADER_OPCODE_MULH: case BRW_OPCODE_FRC: case BRW_OPCODE_RNDU: case BRW_OPCODE_RNDD: @@ -84,12 +71,29 @@ is_expression(const fs_inst *const inst) case BRW_OPCODE_PLN: case BRW_OPCODE_MAD: case BRW_OPCODE_LRP: + case FS_OPCODE_FB_READ_LOGICAL: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD: case FS_OPCODE_CINTERP: case FS_OPCODE_LINTERP: + case SHADER_OPCODE_FIND_LIVE_CHANNEL: case SHADER_OPCODE_BROADCAST: + case SHADER_OPCODE_MOV_INDIRECT: + case SHADER_OPCODE_TEX_LOGICAL: + case SHADER_OPCODE_TXD_LOGICAL: + case SHADER_OPCODE_TXF_LOGICAL: + case SHADER_OPCODE_TXL_LOGICAL: + case SHADER_OPCODE_TXS_LOGICAL: + case FS_OPCODE_TXB_LOGICAL: + case SHADER_OPCODE_TXF_CMS_LOGICAL: + case SHADER_OPCODE_TXF_CMS_W_LOGICAL: + case SHADER_OPCODE_TXF_UMS_LOGICAL: + case SHADER_OPCODE_TXF_MCS_LOGICAL: + case SHADER_OPCODE_LOD_LOGICAL: + case SHADER_OPCODE_TG4_LOGICAL: + case SHADER_OPCODE_TG4_OFFSET_LOGICAL: + case FS_OPCODE_PACK: return true; case SHADER_OPCODE_RCP: case SHADER_OPCODE_RSQ: @@ -103,9 +107,10 @@ is_expression(const fs_inst *const inst) case SHADER_OPCODE_COS: return inst->mlen < 2; case SHADER_OPCODE_LOAD_PAYLOAD: - return !is_copy_payload(inst); + return !inst->is_copy_payload(v->alloc); default: - return inst->is_send_from_grf() && !inst->has_side_effects(); + return inst->is_send_from_grf() && !inst->has_side_effects() && + !inst->is_volatile(); } } @@ -121,20 +126,20 @@ operands_match(const fs_inst *a, const fs_inst *b, bool *negate) (xs[2].equals(ys[1]) && xs[1].equals(ys[2]))); } else if (a->opcode == BRW_OPCODE_MUL && a->dst.type == BRW_REGISTER_TYPE_F) { bool xs0_negate = xs[0].negate; - bool xs1_negate = xs[1].file == IMM ? xs[1].fixed_hw_reg.dw1.f < 0.0f + bool xs1_negate = xs[1].file == IMM ? xs[1].f < 0.0f : xs[1].negate; bool ys0_negate = ys[0].negate; - bool ys1_negate = ys[1].file == IMM ? ys[1].fixed_hw_reg.dw1.f < 0.0f + bool ys1_negate = ys[1].file == IMM ? ys[1].f < 0.0f : ys[1].negate; - float xs1_imm = xs[1].fixed_hw_reg.dw1.f; - float ys1_imm = ys[1].fixed_hw_reg.dw1.f; + float xs1_imm = xs[1].f; + float ys1_imm = ys[1].f; xs[0].negate = false; xs[1].negate = false; ys[0].negate = false; ys[1].negate = false; - xs[1].fixed_hw_reg.dw1.f = fabsf(xs[1].fixed_hw_reg.dw1.f); - ys[1].fixed_hw_reg.dw1.f = fabsf(ys[1].fixed_hw_reg.dw1.f); + xs[1].f = fabsf(xs[1].f); + ys[1].f = fabsf(ys[1].f); bool ret = (xs[0].equals(ys[0]) && xs[1].equals(ys[1])) || (xs[1].equals(ys[0]) && xs[0].equals(ys[1])); @@ -143,10 +148,12 @@ operands_match(const fs_inst *a, const fs_inst *b, bool *negate) xs[1].negate = xs[1].file == IMM ? false : xs1_negate; ys[0].negate = ys0_negate; ys[1].negate = ys[1].file == IMM ? false : ys1_negate; - xs[1].fixed_hw_reg.dw1.f = xs1_imm; - ys[1].fixed_hw_reg.dw1.f = ys1_imm; + xs[1].f = xs1_imm; + ys[1].f = ys1_imm; *negate = (xs0_negate != xs1_negate) != (ys0_negate != ys1_negate); + if (*negate && (a->saturate || b->saturate)) + return false; return ret; } else if (!a->is_commutative()) { bool match = true; @@ -167,23 +174,69 @@ static bool instructions_match(fs_inst *a, fs_inst *b, bool *negate) { return a->opcode == b->opcode && + a->force_writemask_all == b->force_writemask_all && + a->exec_size == b->exec_size && + a->group == b->group && a->saturate == b->saturate && a->predicate == b->predicate && a->predicate_inverse == b->predicate_inverse && a->conditional_mod == b->conditional_mod && + a->flag_subreg == b->flag_subreg && a->dst.type == b->dst.type && + a->offset == b->offset && + a->mlen == b->mlen && + a->size_written == b->size_written && + a->base_mrf == b->base_mrf && + a->eot == b->eot && + a->header_size == b->header_size && + a->shadow_compare == b->shadow_compare && + a->pi_noperspective == b->pi_noperspective && + a->target == b->target && a->sources == b->sources && - (a->is_tex() ? (a->offset == b->offset && - a->mlen == b->mlen && - a->regs_written == b->regs_written && - a->base_mrf == b->base_mrf && - a->eot == b->eot && - a->header_present == b->header_present && - a->shadow_compare == b->shadow_compare) - : true) && operands_match(a, b, negate); } +static void +create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate) +{ + unsigned written = regs_written(inst); + unsigned dst_width = + DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE); + fs_inst *copy; + + if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD || + written != dst_width) { + fs_reg *payload; + int sources, header_size; + if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) { + sources = inst->sources; + header_size = inst->header_size; + } else { + assert(written % dst_width == 0); + sources = written / dst_width; + header_size = 0; + } + + assert(src.file == VGRF); + payload = ralloc_array(bld.shader->mem_ctx, fs_reg, sources); + for (int i = 0; i < header_size; i++) { + payload[i] = src; + src.offset += REG_SIZE; + } + for (int i = header_size; i < sources; i++) { + payload[i] = src; + src = offset(src, bld, 1); + } + copy = bld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size); + } else { + copy = bld.MOV(inst->dst, src); + copy->group = inst->group; + copy->force_writemask_all = inst->force_writemask_all; + copy->src[0].negate = negate; + } + assert(regs_written(copy) == written); +} + bool fs_visitor::opt_cse_local(bblock_t *block) { @@ -195,8 +248,9 @@ fs_visitor::opt_cse_local(bblock_t *block) int ip = block->start_ip; foreach_inst_in_block(fs_inst, inst, block) { /* Skip some cases. */ - if (is_expression(inst) && !inst->is_partial_write() && - (inst->dst.file != HW_REG || inst->dst.is_null())) + if (is_expression(this, inst) && !inst->is_partial_write() && + ((inst->dst.file != ARF && inst->dst.file != FIXED_GRF) || + inst->dst.is_null())) { bool found = false; bool negate = false; @@ -228,51 +282,25 @@ fs_visitor::opt_cse_local(bblock_t *block) */ bool no_existing_temp = entry->tmp.file == BAD_FILE; if (no_existing_temp && !entry->generator->dst.is_null()) { - int written = entry->generator->regs_written; - int dst_width = entry->generator->dst.width / 8; - assert(written % dst_width == 0); - - fs_reg orig_dst = entry->generator->dst; - fs_reg tmp = fs_reg(GRF, alloc.allocate(written), - orig_dst.type, orig_dst.width); - entry->tmp = tmp; - entry->generator->dst = tmp; - - fs_inst *copy; - if (written > dst_width) { - fs_reg *sources = ralloc_array(mem_ctx, fs_reg, written / dst_width); - for (int i = 0; i < written / dst_width; i++) - sources[i] = offset(tmp, i); - copy = LOAD_PAYLOAD(orig_dst, sources, written / dst_width); - } else { - copy = MOV(orig_dst, tmp); - copy->force_writemask_all = - entry->generator->force_writemask_all; - } - entry->generator->insert_after(block, copy); + const fs_builder ibld = fs_builder(this, block, entry->generator) + .at(block, entry->generator->next); + int written = regs_written(entry->generator); + + entry->tmp = fs_reg(VGRF, alloc.allocate(written), + entry->generator->dst.type); + + create_copy_instr(ibld, entry->generator, entry->tmp, false); + + entry->generator->dst = entry->tmp; } /* dest <- temp */ if (!inst->dst.is_null()) { - int written = inst->regs_written; - int dst_width = inst->dst.width / 8; - assert(written == entry->generator->regs_written); - assert(dst_width == entry->generator->dst.width / 8); + assert(inst->size_written == entry->generator->size_written); assert(inst->dst.type == entry->tmp.type); - fs_reg dst = inst->dst; - fs_reg tmp = entry->tmp; - fs_inst *copy; - if (written > dst_width) { - fs_reg *sources = ralloc_array(mem_ctx, fs_reg, written / dst_width); - for (int i = 0; i < written / dst_width; i++) - sources[i] = offset(tmp, i); - copy = LOAD_PAYLOAD(dst, sources, written / dst_width); - } else { - copy = MOV(dst, tmp); - copy->force_writemask_all = inst->force_writemask_all; - copy->src[0].negate = negate; - } - inst->insert_before(block, copy); + const fs_builder ibld(this, block, inst); + + create_copy_instr(ibld, inst, entry->tmp, negate); } /* Set our iterator so that next time through the loop inst->next @@ -290,10 +318,10 @@ fs_visitor::opt_cse_local(bblock_t *block) /* Kill all AEB entries that write a different value to or read from * the flag register if we just wrote it. */ - if (inst->writes_flag()) { + if (inst->flags_written()) { bool negate; /* dummy */ - if (entry->generator->reads_flag() || - (entry->generator->writes_flag() && + if (entry->generator->flags_read(devinfo) || + (entry->generator->flags_written() && !instructions_match(inst, entry->generator, &negate))) { entry->remove(); ralloc_free(entry); @@ -307,7 +335,9 @@ fs_visitor::opt_cse_local(bblock_t *block) /* Kill all AEB entries that use the destination we just * overwrote. */ - if (inst->overwrites_reg(entry->generator->src[i])) { + if (regions_overlap(inst->dst, inst->size_written, + entry->generator->src[i], + entry->generator->size_read(i))) { entry->remove(); ralloc_free(entry); break; @@ -316,7 +346,7 @@ fs_visitor::opt_cse_local(bblock_t *block) /* Kill any AEB entries using registers that don't get reused any * more -- a sure sign they'll fail operands_match(). */ - if (src_reg->file == GRF && virtual_grf_end[src_reg->reg] < ip) { + if (src_reg->file == VGRF && virtual_grf_end[src_reg->nr] < ip) { entry->remove(); ralloc_free(entry); break;