From: Daniel Schürmann Date: Tue, 10 Mar 2020 09:00:32 +0000 (+0100) Subject: aco: improve hashing for value numbering X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=fb5a7902f20ad1285fa875c93bc719a1499d1cb4;p=mesa.git aco: improve hashing for value numbering An improved hashing greatly reduces the number of collisions, and thus, increases the speed for lookups in the hash table. The hash function now uses Murmur3 written by Austin Appleby. This patch also pre-reserves space for the hashmap to avoid rehashing. Reviewed-by: Rhys Perry Part-of: --- diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp index 2f5a3b8eec9..4ab3d6d56e5 100644 --- a/src/amd/compiler/aco_opt_value_numbering.cpp +++ b/src/amd/compiler/aco_opt_value_numbering.cpp @@ -34,41 +34,86 @@ namespace aco { namespace { +inline +uint32_t murmur_32_scramble(uint32_t h, uint32_t k) { + k *= 0xcc9e2d51; + k = (k << 15) | (k >> 17); + h ^= k * 0x1b873593; + h = (h << 13) | (h >> 19); + h = h * 5 + 0xe6546b64; + return h; +} + +template +uint32_t hash_murmur_32(Instruction* instr) +{ + uint32_t hash = uint32_t(instr->format) << 16 | uint32_t(instr->opcode); + + for (const Operand& op : instr->operands) + hash = murmur_32_scramble(hash, op.constantValue()); + + /* skip format, opcode and pass_flags */ + for (unsigned i = 2; i < (sizeof(T) >> 2); i++) { + uint32_t u; + /* Accesses it though a byte array, so doesn't violate the strict aliasing rule */ + memcpy(&u, reinterpret_cast(instr) + i * 4, 4); + hash = murmur_32_scramble(hash, u); + } + + /* Finalize. */ + uint32_t len = instr->operands.size() + instr->definitions.size() + sizeof(T); + hash ^= len; + hash ^= hash >> 16; + hash *= 0x85ebca6b; + hash ^= hash >> 13; + hash *= 0xc2b2ae35; + hash ^= hash >> 16; + return hash; +} + struct InstrHash { + /* This hash function uses the Murmur3 algorithm written by Austin Appleby + * https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp + * + * In order to calculate the expression set, only the right-hand-side of an + * instruction is used for the hash, i.e. everything except the definitions. + */ std::size_t operator()(Instruction* instr) const { - uint64_t hash = (uint64_t) instr->opcode + (uint64_t) instr->format; - for (unsigned i = 0; i < instr->operands.size(); i++) { - Operand op = instr->operands[i]; - uint64_t val = op.isTemp() ? op.tempId() : op.isFixed() ? op.physReg() : op.constantValue(); - hash |= val << (i+1) * 8; - } - if (instr->isVOP3()) { - VOP3A_instruction* vop3 = static_cast(instr); - for (unsigned i = 0; i < 3; i++) { - hash ^= vop3->abs[i] << (i*3 + 0); - hash ^= vop3->neg[i] << (i*3 + 2); - } - hash ^= vop3->opsel * 13; - hash ^= (vop3->clamp << 28) * 13; - hash += vop3->omod << 19; - } + if (instr->isVOP3()) + return hash_murmur_32(instr); + + if (instr->isDPP()) + return hash_murmur_32(instr); + switch (instr->format) { case Format::SMEM: - break; - case Format::VINTRP: { - Interp_instruction* interp = static_cast(instr); - hash ^= interp->attribute << 13; - hash ^= interp->component << 27; - break; - } + return hash_murmur_32(instr); + case Format::VINTRP: + return hash_murmur_32(instr); case Format::DS: - break; + return hash_murmur_32(instr); + case Format::SOPP: + return hash_murmur_32(instr); + case Format::SOPK: + return hash_murmur_32(instr); + case Format::EXP: + return hash_murmur_32(instr); + case Format::MUBUF: + return hash_murmur_32(instr); + case Format::MIMG: + return hash_murmur_32(instr); + case Format::MTBUF: + return hash_murmur_32(instr); + case Format::FLAT: + return hash_murmur_32(instr); + case Format::PSEUDO_BRANCH: + return hash_murmur_32(instr); + case Format::PSEUDO_REDUCTION: + return hash_murmur_32(instr); default: - break; + return hash_murmur_32(instr); } - - return hash; } }; @@ -272,7 +317,13 @@ struct vn_ctx { */ uint32_t exec_id = 1; - vn_ctx(Program* program) : program(program) {} + vn_ctx(Program* program) : program(program) { + static_assert(sizeof(Temp) == 4, "Temp must fit in 32bits"); + unsigned size = 0; + for (Block& block : program->blocks) + size += block.instructions.size(); + expr_values.reserve(size); + } };