X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_fs_cse.cpp;h=48220efd73040bad690afb28038319aebaafccfd;hb=ed65e6ef49e17e9cae93a8f98e2968346de2bc6e;hp=cde6566c05c043a0e721c7272c9126a7a0b154ce;hpb=9645b8eb1f1b79e537ad8ddb683507df7bc9da58;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index cde6566c05c..48220efd730 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -71,16 +71,29 @@ is_expression(const fs_visitor *v, const fs_inst *const inst) case BRW_OPCODE_PLN: case BRW_OPCODE_MAD: case BRW_OPCODE_LRP: + case FS_OPCODE_FB_READ_LOGICAL: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD: case FS_OPCODE_CINTERP: case FS_OPCODE_LINTERP: case SHADER_OPCODE_FIND_LIVE_CHANNEL: case SHADER_OPCODE_BROADCAST: - case SHADER_OPCODE_EXTRACT_BYTE: - case SHADER_OPCODE_EXTRACT_WORD: case SHADER_OPCODE_MOV_INDIRECT: + case SHADER_OPCODE_TEX_LOGICAL: + case SHADER_OPCODE_TXD_LOGICAL: + case SHADER_OPCODE_TXF_LOGICAL: + case SHADER_OPCODE_TXL_LOGICAL: + case SHADER_OPCODE_TXS_LOGICAL: + case FS_OPCODE_TXB_LOGICAL: + case SHADER_OPCODE_TXF_CMS_LOGICAL: + case SHADER_OPCODE_TXF_CMS_W_LOGICAL: + case SHADER_OPCODE_TXF_UMS_LOGICAL: + case SHADER_OPCODE_TXF_MCS_LOGICAL: + case SHADER_OPCODE_LOD_LOGICAL: + case SHADER_OPCODE_TG4_LOGICAL: + case SHADER_OPCODE_TG4_OFFSET_LOGICAL: + case FS_OPCODE_PACK: return true; case SHADER_OPCODE_RCP: case SHADER_OPCODE_RSQ: @@ -139,6 +152,8 @@ operands_match(const fs_inst *a, const fs_inst *b, bool *negate) ys[1].f = ys1_imm; *negate = (xs0_negate != xs1_negate) != (ys0_negate != ys1_negate); + if (*negate && (a->saturate || b->saturate)) + return false; return ret; } else if (!a->is_commutative()) { bool match = true; @@ -161,7 +176,7 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate) return a->opcode == b->opcode && a->force_writemask_all == b->force_writemask_all && a->exec_size == b->exec_size && - a->force_sechalf == b->force_sechalf && + a->group == b->group && a->saturate == b->saturate && a->predicate == b->predicate && a->predicate_inverse == b->predicate_inverse && @@ -170,12 +185,13 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate) a->dst.type == b->dst.type && a->offset == b->offset && a->mlen == b->mlen && - a->regs_written == b->regs_written && + a->size_written == b->size_written && a->base_mrf == b->base_mrf && a->eot == b->eot && a->header_size == b->header_size && a->shadow_compare == b->shadow_compare && a->pi_noperspective == b->pi_noperspective && + a->target == b->target && a->sources == b->sources && operands_match(a, b, negate); } @@ -183,11 +199,13 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate) static void create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate) { - int written = inst->regs_written; - int dst_width = inst->exec_size / 8; + unsigned written = regs_written(inst); + unsigned dst_width = + DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE); fs_inst *copy; - if (written > dst_width) { + if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD || + written != dst_width) { fs_reg *payload; int sources, header_size; if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) { @@ -203,7 +221,7 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate) payload = ralloc_array(bld.shader->mem_ctx, fs_reg, sources); for (int i = 0; i < header_size; i++) { payload[i] = src; - src.reg_offset++; + src.offset += REG_SIZE; } for (int i = header_size; i < sources; i++) { payload[i] = src; @@ -212,11 +230,11 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate) copy = bld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size); } else { copy = bld.MOV(inst->dst, src); - copy->force_sechalf = inst->force_sechalf; + copy->group = inst->group; copy->force_writemask_all = inst->force_writemask_all; copy->src[0].negate = negate; } - assert(copy->regs_written == written); + assert(regs_written(copy) == written); } bool @@ -266,7 +284,7 @@ fs_visitor::opt_cse_local(bblock_t *block) if (no_existing_temp && !entry->generator->dst.is_null()) { const fs_builder ibld = fs_builder(this, block, entry->generator) .at(block, entry->generator->next); - int written = entry->generator->regs_written; + int written = regs_written(entry->generator); entry->tmp = fs_reg(VGRF, alloc.allocate(written), entry->generator->dst.type); @@ -278,7 +296,7 @@ fs_visitor::opt_cse_local(bblock_t *block) /* dest <- temp */ if (!inst->dst.is_null()) { - assert(inst->regs_written == entry->generator->regs_written); + assert(inst->size_written == entry->generator->size_written); assert(inst->dst.type == entry->tmp.type); const fs_builder ibld(this, block, inst); @@ -300,10 +318,10 @@ fs_visitor::opt_cse_local(bblock_t *block) /* Kill all AEB entries that write a different value to or read from * the flag register if we just wrote it. */ - if (inst->writes_flag()) { + if (inst->flags_written()) { bool negate; /* dummy */ - if (entry->generator->reads_flag() || - (entry->generator->writes_flag() && + if (entry->generator->flags_read(devinfo) || + (entry->generator->flags_written() && !instructions_match(inst, entry->generator, &negate))) { entry->remove(); ralloc_free(entry); @@ -317,7 +335,9 @@ fs_visitor::opt_cse_local(bblock_t *block) /* Kill all AEB entries that use the destination we just * overwrote. */ - if (inst->overwrites_reg(entry->generator->src[i])) { + if (regions_overlap(inst->dst, inst->size_written, + entry->generator->src[i], + entry->generator->size_read(i))) { entry->remove(); ralloc_free(entry); break;