X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fcompiler%2Faco_opt_value_numbering.cpp;h=93668442d329848b223a347883ef38bd55e6bacf;hb=51bc11abc206ae5ea0946f5a79c68527701c24e0;hp=708987d4285ee3ec766345bc92e570d7d8c3acaa;hpb=b6905438514ae4de0b7f85c861e3d811ddaadda9;p=mesa.git diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp index 708987d4285..93668442d32 100644 --- a/src/amd/compiler/aco_opt_value_numbering.cpp +++ b/src/amd/compiler/aco_opt_value_numbering.cpp @@ -34,41 +34,89 @@ namespace aco { namespace { +inline +uint32_t murmur_32_scramble(uint32_t h, uint32_t k) { + k *= 0xcc9e2d51; + k = (k << 15) | (k >> 17); + h ^= k * 0x1b873593; + h = (h << 13) | (h >> 19); + h = h * 5 + 0xe6546b64; + return h; +} + +template +uint32_t hash_murmur_32(Instruction* instr) +{ + uint32_t hash = uint32_t(instr->format) << 16 | uint32_t(instr->opcode); + + for (const Operand& op : instr->operands) + hash = murmur_32_scramble(hash, op.constantValue()); + + /* skip format, opcode and pass_flags */ + for (unsigned i = 2; i < (sizeof(T) >> 2); i++) { + uint32_t u; + /* Accesses it though a byte array, so doesn't violate the strict aliasing rule */ + memcpy(&u, reinterpret_cast(instr) + i * 4, 4); + hash = murmur_32_scramble(hash, u); + } + + /* Finalize. */ + uint32_t len = instr->operands.size() + instr->definitions.size() + sizeof(T); + hash ^= len; + hash ^= hash >> 16; + hash *= 0x85ebca6b; + hash ^= hash >> 13; + hash *= 0xc2b2ae35; + hash ^= hash >> 16; + return hash; +} + struct InstrHash { + /* This hash function uses the Murmur3 algorithm written by Austin Appleby + * https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp + * + * In order to calculate the expression set, only the right-hand-side of an + * instruction is used for the hash, i.e. everything except the definitions. + */ std::size_t operator()(Instruction* instr) const { - uint64_t hash = (uint64_t) instr->opcode + (uint64_t) instr->format; - for (unsigned i = 0; i < instr->operands.size(); i++) { - Operand op = instr->operands[i]; - uint64_t val = op.isTemp() ? op.tempId() : op.isFixed() ? op.physReg() : op.constantValue(); - hash |= val << (i+1) * 8; - } - if (instr->isVOP3()) { - VOP3A_instruction* vop3 = static_cast(instr); - for (unsigned i = 0; i < 3; i++) { - hash ^= vop3->abs[i] << (i*3 + 0); - hash ^= vop3->opsel[i] << (i*3 + 1); - hash ^= vop3->neg[i] << (i*3 + 2); - } - hash ^= (vop3->clamp << 28) * 13; - hash += vop3->omod << 19; - } + if (instr->isVOP3()) + return hash_murmur_32(instr); + + if (instr->isDPP()) + return hash_murmur_32(instr); + + if (instr->isSDWA()) + return hash_murmur_32(instr); + switch (instr->format) { case Format::SMEM: - break; - case Format::VINTRP: { - Interp_instruction* interp = static_cast(instr); - hash ^= interp->attribute << 13; - hash ^= interp->component << 27; - break; - } + return hash_murmur_32(instr); + case Format::VINTRP: + return hash_murmur_32(instr); case Format::DS: - break; + return hash_murmur_32(instr); + case Format::SOPP: + return hash_murmur_32(instr); + case Format::SOPK: + return hash_murmur_32(instr); + case Format::EXP: + return hash_murmur_32(instr); + case Format::MUBUF: + return hash_murmur_32(instr); + case Format::MIMG: + return hash_murmur_32(instr); + case Format::MTBUF: + return hash_murmur_32(instr); + case Format::FLAT: + return hash_murmur_32(instr); + case Format::PSEUDO_BRANCH: + return hash_murmur_32(instr); + case Format::PSEUDO_REDUCTION: + return hash_murmur_32(instr); default: - break; + return hash_murmur_32(instr); } - - return hash; } }; @@ -134,12 +182,12 @@ struct InstrPred { VOP3A_instruction* b3 = static_cast(b); for (unsigned i = 0; i < 3; i++) { if (a3->abs[i] != b3->abs[i] || - a3->opsel[i] != b3->opsel[i] || a3->neg[i] != b3->neg[i]) return false; } return a3->clamp == b3->clamp && - a3->omod == b3->omod; + a3->omod == b3->omod && + a3->opsel == b3->opsel; } if (a->isDPP()) { DPP_instruction* aDPP = static_cast(a); @@ -154,6 +202,20 @@ struct InstrPred { aDPP->neg[0] == bDPP->neg[0] && aDPP->neg[1] == bDPP->neg[1]; } + if (a->isSDWA()) { + SDWA_instruction* aSDWA = static_cast(a); + SDWA_instruction* bSDWA = static_cast(b); + return aSDWA->sel[0] == bSDWA->sel[0] && + aSDWA->sel[1] == bSDWA->sel[1] && + aSDWA->dst_sel == bSDWA->dst_sel && + aSDWA->abs[0] == bSDWA->abs[0] && + aSDWA->abs[1] == bSDWA->abs[1] && + aSDWA->neg[0] == bSDWA->neg[0] && + aSDWA->neg[1] == bSDWA->neg[1] && + aSDWA->dst_preserve == bSDWA->dst_preserve && + aSDWA->clamp == bSDWA->clamp && + aSDWA->omod == bSDWA->omod; + } switch (a->format) { case Format::SOPK: { @@ -184,7 +246,6 @@ struct InstrPred { aR->cluster_size == bR->cluster_size; } case Format::MTBUF: { - /* this is fine since they are only used for vertex input fetches */ MTBUF_instruction* aM = static_cast(a); MTBUF_instruction* bM = static_cast(b); return aM->can_reorder && bM->can_reorder && @@ -195,12 +256,27 @@ struct InstrPred { aM->offen == bM->offen && aM->idxen == bM->idxen && aM->glc == bM->glc && + aM->dlc == bM->dlc && aM->slc == bM->slc && aM->tfe == bM->tfe && aM->disable_wqm == bM->disable_wqm; } + case Format::MUBUF: { + MUBUF_instruction* aM = static_cast(a); + MUBUF_instruction* bM = static_cast(b); + return aM->can_reorder && bM->can_reorder && + aM->barrier == bM->barrier && + aM->offset == bM->offset && + aM->offen == bM->offen && + aM->idxen == bM->idxen && + aM->glc == bM->glc && + aM->dlc == bM->dlc && + aM->slc == bM->slc && + aM->tfe == bM->tfe && + aM->lds == bM->lds && + aM->disable_wqm == bM->disable_wqm; + } /* we want to optimize these in NIR and don't hassle with load-store dependencies */ - case Format::MUBUF: case Format::FLAT: case Format::GLOBAL: case Format::SCRATCH: @@ -258,7 +334,13 @@ struct vn_ctx { */ uint32_t exec_id = 1; - vn_ctx(Program* program) : program(program) {} + vn_ctx(Program* program) : program(program) { + static_assert(sizeof(Temp) == 4, "Temp must fit in 32bits"); + unsigned size = 0; + for (Block& block : program->blocks) + size += block.instructions.size(); + expr_values.reserve(size); + } }; @@ -319,6 +401,8 @@ void process_block(vn_ctx& ctx, Block& block) assert(instr->definitions[i].regClass() == orig_instr->definitions[i].regClass()); assert(instr->definitions[i].isTemp()); ctx.renames[instr->definitions[i].tempId()] = orig_instr->definitions[i].getTemp(); + if (instr->definitions[i].isPrecise()) + orig_instr->definitions[i].setPrecise(true); } } else { ctx.expr_values.erase(res.first);