X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_fs_cse.cpp;h=48220efd73040bad690afb28038319aebaafccfd;hb=ed65e6ef49e17e9cae93a8f98e2968346de2bc6e;hp=38fae1781142f2adacc4c2e06af651475e949fb2;hpb=a28ad9d4c0d4b95aee8c3b99e9aaa59add21ea9d;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 38fae178114..48220efd730 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -32,6 +32,8 @@ * 13.1 (p378). */ +using namespace brw; + namespace { struct aeb_entry : public exec_node { /** The instruction that generates the expression value. */ @@ -43,23 +45,7 @@ struct aeb_entry : public exec_node { } static bool -is_copy_payload(const fs_inst *inst) -{ - const int reg = inst->src[0].reg; - if (inst->src[0].reg_offset != 0) - return false; - - for (int i = 1; i < inst->sources; i++) { - if (inst->src[i].reg != reg || - inst->src[i].reg_offset != i) { - return false; - } - } - return true; -} - -static bool -is_expression(const fs_inst *const inst) +is_expression(const fs_visitor *v, const fs_inst *const inst) { switch (inst->opcode) { case BRW_OPCODE_MOV: @@ -75,6 +61,7 @@ is_expression(const fs_inst *const inst) case BRW_OPCODE_CMPN: case BRW_OPCODE_ADD: case BRW_OPCODE_MUL: + case SHADER_OPCODE_MULH: case BRW_OPCODE_FRC: case BRW_OPCODE_RNDU: case BRW_OPCODE_RNDD: @@ -84,11 +71,29 @@ is_expression(const fs_inst *const inst) case BRW_OPCODE_PLN: case BRW_OPCODE_MAD: case BRW_OPCODE_LRP: + case FS_OPCODE_FB_READ_LOGICAL: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD: case FS_OPCODE_CINTERP: case FS_OPCODE_LINTERP: + case SHADER_OPCODE_FIND_LIVE_CHANNEL: + case SHADER_OPCODE_BROADCAST: + case SHADER_OPCODE_MOV_INDIRECT: + case SHADER_OPCODE_TEX_LOGICAL: + case SHADER_OPCODE_TXD_LOGICAL: + case SHADER_OPCODE_TXF_LOGICAL: + case SHADER_OPCODE_TXL_LOGICAL: + case SHADER_OPCODE_TXS_LOGICAL: + case FS_OPCODE_TXB_LOGICAL: + case SHADER_OPCODE_TXF_CMS_LOGICAL: + case SHADER_OPCODE_TXF_CMS_W_LOGICAL: + case SHADER_OPCODE_TXF_UMS_LOGICAL: + case SHADER_OPCODE_TXF_MCS_LOGICAL: + case SHADER_OPCODE_LOD_LOGICAL: + case SHADER_OPCODE_TG4_LOGICAL: + case SHADER_OPCODE_TG4_OFFSET_LOGICAL: + case FS_OPCODE_PACK: return true; case SHADER_OPCODE_RCP: case SHADER_OPCODE_RSQ: @@ -102,29 +107,15 @@ is_expression(const fs_inst *const inst) case SHADER_OPCODE_COS: return inst->mlen < 2; case SHADER_OPCODE_LOAD_PAYLOAD: - return !is_copy_payload(inst); + return !inst->is_copy_payload(v->alloc); default: - return inst->is_send_from_grf() && !inst->has_side_effects(); + return inst->is_send_from_grf() && !inst->has_side_effects() && + !inst->is_volatile(); } } static bool -is_expression_commutative(enum opcode op) -{ - switch (op) { - case BRW_OPCODE_AND: - case BRW_OPCODE_OR: - case BRW_OPCODE_XOR: - case BRW_OPCODE_ADD: - case BRW_OPCODE_MUL: - return true; - default: - return false; - } -} - -static bool -operands_match(fs_inst *a, fs_inst *b) +operands_match(const fs_inst *a, const fs_inst *b, bool *negate) { fs_reg *xs = a->src; fs_reg *ys = b->src; @@ -133,7 +124,38 @@ operands_match(fs_inst *a, fs_inst *b) return xs[0].equals(ys[0]) && ((xs[1].equals(ys[1]) && xs[2].equals(ys[2])) || (xs[2].equals(ys[1]) && xs[1].equals(ys[2]))); - } else if (!is_expression_commutative(a->opcode)) { + } else if (a->opcode == BRW_OPCODE_MUL && a->dst.type == BRW_REGISTER_TYPE_F) { + bool xs0_negate = xs[0].negate; + bool xs1_negate = xs[1].file == IMM ? xs[1].f < 0.0f + : xs[1].negate; + bool ys0_negate = ys[0].negate; + bool ys1_negate = ys[1].file == IMM ? ys[1].f < 0.0f + : ys[1].negate; + float xs1_imm = xs[1].f; + float ys1_imm = ys[1].f; + + xs[0].negate = false; + xs[1].negate = false; + ys[0].negate = false; + ys[1].negate = false; + xs[1].f = fabsf(xs[1].f); + ys[1].f = fabsf(ys[1].f); + + bool ret = (xs[0].equals(ys[0]) && xs[1].equals(ys[1])) || + (xs[1].equals(ys[0]) && xs[0].equals(ys[1])); + + xs[0].negate = xs0_negate; + xs[1].negate = xs[1].file == IMM ? false : xs1_negate; + ys[0].negate = ys0_negate; + ys[1].negate = ys[1].file == IMM ? false : ys1_negate; + xs[1].f = xs1_imm; + ys[1].f = ys1_imm; + + *negate = (xs0_negate != xs1_negate) != (ys0_negate != ys1_negate); + if (*negate && (a->saturate || b->saturate)) + return false; + return ret; + } else if (!a->is_commutative()) { bool match = true; for (int i = 0; i < a->sources; i++) { if (!xs[i].equals(ys[i])) { @@ -149,24 +171,70 @@ operands_match(fs_inst *a, fs_inst *b) } static bool -instructions_match(fs_inst *a, fs_inst *b) +instructions_match(fs_inst *a, fs_inst *b, bool *negate) { return a->opcode == b->opcode && + a->force_writemask_all == b->force_writemask_all && + a->exec_size == b->exec_size && + a->group == b->group && a->saturate == b->saturate && a->predicate == b->predicate && a->predicate_inverse == b->predicate_inverse && a->conditional_mod == b->conditional_mod && + a->flag_subreg == b->flag_subreg && a->dst.type == b->dst.type && + a->offset == b->offset && + a->mlen == b->mlen && + a->size_written == b->size_written && + a->base_mrf == b->base_mrf && + a->eot == b->eot && + a->header_size == b->header_size && + a->shadow_compare == b->shadow_compare && + a->pi_noperspective == b->pi_noperspective && + a->target == b->target && a->sources == b->sources && - (a->is_tex() ? (a->offset == b->offset && - a->mlen == b->mlen && - a->regs_written == b->regs_written && - a->base_mrf == b->base_mrf && - a->eot == b->eot && - a->header_present == b->header_present && - a->shadow_compare == b->shadow_compare) - : true) && - operands_match(a, b); + operands_match(a, b, negate); +} + +static void +create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate) +{ + unsigned written = regs_written(inst); + unsigned dst_width = + DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE); + fs_inst *copy; + + if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD || + written != dst_width) { + fs_reg *payload; + int sources, header_size; + if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) { + sources = inst->sources; + header_size = inst->header_size; + } else { + assert(written % dst_width == 0); + sources = written / dst_width; + header_size = 0; + } + + assert(src.file == VGRF); + payload = ralloc_array(bld.shader->mem_ctx, fs_reg, sources); + for (int i = 0; i < header_size; i++) { + payload[i] = src; + src.offset += REG_SIZE; + } + for (int i = header_size; i < sources; i++) { + payload[i] = src; + src = offset(src, bld, 1); + } + copy = bld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size); + } else { + copy = bld.MOV(inst->dst, src); + copy->group = inst->group; + copy->force_writemask_all = inst->force_writemask_all; + copy->src[0].negate = negate; + } + assert(regs_written(copy) == written); } bool @@ -180,14 +248,17 @@ fs_visitor::opt_cse_local(bblock_t *block) int ip = block->start_ip; foreach_inst_in_block(fs_inst, inst, block) { /* Skip some cases. */ - if (is_expression(inst) && !inst->is_partial_write() && - (inst->dst.file != HW_REG || inst->dst.is_null())) + if (is_expression(this, inst) && !inst->is_partial_write() && + ((inst->dst.file != ARF && inst->dst.file != FIXED_GRF) || + inst->dst.is_null())) { bool found = false; + bool negate = false; foreach_in_list_use_after(aeb_entry, entry, &aeb) { /* Match current instruction's expression against those in AEB. */ - if (instructions_match(inst, entry->generator)) { + if (!(entry->generator->dst.is_null() && !inst->dst.is_null()) && + instructions_match(inst, entry->generator, &negate)) { found = true; progress = true; break; @@ -211,50 +282,25 @@ fs_visitor::opt_cse_local(bblock_t *block) */ bool no_existing_temp = entry->tmp.file == BAD_FILE; if (no_existing_temp && !entry->generator->dst.is_null()) { - int written = entry->generator->regs_written; - int dst_width = entry->generator->dst.width / 8; - assert(written % dst_width == 0); - - fs_reg orig_dst = entry->generator->dst; - fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written), - orig_dst.type, orig_dst.width); - entry->tmp = tmp; - entry->generator->dst = tmp; - - fs_inst *copy; - if (written > dst_width) { - fs_reg *sources = ralloc_array(mem_ctx, fs_reg, written / dst_width); - for (int i = 0; i < written / dst_width; i++) - sources[i] = offset(tmp, i); - copy = LOAD_PAYLOAD(orig_dst, sources, written / dst_width); - } else { - copy = MOV(orig_dst, tmp); - copy->force_writemask_all = - entry->generator->force_writemask_all; - } - entry->generator->insert_after(block, copy); + const fs_builder ibld = fs_builder(this, block, entry->generator) + .at(block, entry->generator->next); + int written = regs_written(entry->generator); + + entry->tmp = fs_reg(VGRF, alloc.allocate(written), + entry->generator->dst.type); + + create_copy_instr(ibld, entry->generator, entry->tmp, false); + + entry->generator->dst = entry->tmp; } /* dest <- temp */ if (!inst->dst.is_null()) { - int written = inst->regs_written; - int dst_width = inst->dst.width / 8; - assert(written == entry->generator->regs_written); - assert(dst_width == entry->generator->dst.width / 8); + assert(inst->size_written == entry->generator->size_written); assert(inst->dst.type == entry->tmp.type); - fs_reg dst = inst->dst; - fs_reg tmp = entry->tmp; - fs_inst *copy; - if (written > dst_width) { - fs_reg *sources = ralloc_array(mem_ctx, fs_reg, written / dst_width); - for (int i = 0; i < written / dst_width; i++) - sources[i] = offset(tmp, i); - copy = LOAD_PAYLOAD(dst, sources, written / dst_width); - } else { - copy = MOV(dst, tmp); - copy->force_writemask_all = inst->force_writemask_all; - } - inst->insert_before(block, copy); + const fs_builder ibld(this, block, inst); + + create_copy_instr(ibld, inst, entry->tmp, negate); } /* Set our iterator so that next time through the loop inst->next @@ -272,10 +318,11 @@ fs_visitor::opt_cse_local(bblock_t *block) /* Kill all AEB entries that write a different value to or read from * the flag register if we just wrote it. */ - if (inst->writes_flag()) { - if (entry->generator->reads_flag() || - (entry->generator->writes_flag() && - !instructions_match(inst, entry->generator))) { + if (inst->flags_written()) { + bool negate; /* dummy */ + if (entry->generator->flags_read(devinfo) || + (entry->generator->flags_written() && + !instructions_match(inst, entry->generator, &negate))) { entry->remove(); ralloc_free(entry); continue; @@ -288,7 +335,9 @@ fs_visitor::opt_cse_local(bblock_t *block) /* Kill all AEB entries that use the destination we just * overwrote. */ - if (inst->overwrites_reg(entry->generator->src[i])) { + if (regions_overlap(inst->dst, inst->size_written, + entry->generator->src[i], + entry->generator->size_read(i))) { entry->remove(); ralloc_free(entry); break; @@ -297,7 +346,7 @@ fs_visitor::opt_cse_local(bblock_t *block) /* Kill any AEB entries using registers that don't get reused any * more -- a sure sign they'll fail operands_match(). */ - if (src_reg->file == GRF && virtual_grf_end[src_reg->reg] < ip) { + if (src_reg->file == VGRF && virtual_grf_end[src_reg->nr] < ip) { entry->remove(); ralloc_free(entry); break;