From fb5a7902f20ad1285fa875c93bc719a1499d1cb4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Sch=C3=BCrmann?= Date: Tue, 10 Mar 2020 10:00:32 +0100 Subject: [PATCH] aco: improve hashing for value numbering An improved hashing greatly reduces the number of collisions, and thus, increases the speed for lookups in the hash table. The hash function now uses Murmur3 written by Austin Appleby. This patch also pre-reserves space for the hashmap to avoid rehashing. Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_opt_value_numbering.cpp | 107 ++++++++++++++----- 1 file changed, 79 insertions(+), 28 deletions(-) diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp index 2f5a3b8eec9..4ab3d6d56e5 100644 --- a/src/amd/compiler/aco_opt_value_numbering.cpp +++ b/src/amd/compiler/aco_opt_value_numbering.cpp @@ -34,41 +34,86 @@ namespace aco { namespace { +inline +uint32_t murmur_32_scramble(uint32_t h, uint32_t k) { + k *= 0xcc9e2d51; + k = (k << 15) | (k >> 17); + h ^= k * 0x1b873593; + h = (h << 13) | (h >> 19); + h = h * 5 + 0xe6546b64; + return h; +} + +template +uint32_t hash_murmur_32(Instruction* instr) +{ + uint32_t hash = uint32_t(instr->format) << 16 | uint32_t(instr->opcode); + + for (const Operand& op : instr->operands) + hash = murmur_32_scramble(hash, op.constantValue()); + + /* skip format, opcode and pass_flags */ + for (unsigned i = 2; i < (sizeof(T) >> 2); i++) { + uint32_t u; + /* Accesses it though a byte array, so doesn't violate the strict aliasing rule */ + memcpy(&u, reinterpret_cast(instr) + i * 4, 4); + hash = murmur_32_scramble(hash, u); + } + + /* Finalize. */ + uint32_t len = instr->operands.size() + instr->definitions.size() + sizeof(T); + hash ^= len; + hash ^= hash >> 16; + hash *= 0x85ebca6b; + hash ^= hash >> 13; + hash *= 0xc2b2ae35; + hash ^= hash >> 16; + return hash; +} + struct InstrHash { + /* This hash function uses the Murmur3 algorithm written by Austin Appleby + * https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp + * + * In order to calculate the expression set, only the right-hand-side of an + * instruction is used for the hash, i.e. everything except the definitions. + */ std::size_t operator()(Instruction* instr) const { - uint64_t hash = (uint64_t) instr->opcode + (uint64_t) instr->format; - for (unsigned i = 0; i < instr->operands.size(); i++) { - Operand op = instr->operands[i]; - uint64_t val = op.isTemp() ? op.tempId() : op.isFixed() ? op.physReg() : op.constantValue(); - hash |= val << (i+1) * 8; - } - if (instr->isVOP3()) { - VOP3A_instruction* vop3 = static_cast(instr); - for (unsigned i = 0; i < 3; i++) { - hash ^= vop3->abs[i] << (i*3 + 0); - hash ^= vop3->neg[i] << (i*3 + 2); - } - hash ^= vop3->opsel * 13; - hash ^= (vop3->clamp << 28) * 13; - hash += vop3->omod << 19; - } + if (instr->isVOP3()) + return hash_murmur_32(instr); + + if (instr->isDPP()) + return hash_murmur_32(instr); + switch (instr->format) { case Format::SMEM: - break; - case Format::VINTRP: { - Interp_instruction* interp = static_cast(instr); - hash ^= interp->attribute << 13; - hash ^= interp->component << 27; - break; - } + return hash_murmur_32(instr); + case Format::VINTRP: + return hash_murmur_32(instr); case Format::DS: - break; + return hash_murmur_32(instr); + case Format::SOPP: + return hash_murmur_32(instr); + case Format::SOPK: + return hash_murmur_32(instr); + case Format::EXP: + return hash_murmur_32(instr); + case Format::MUBUF: + return hash_murmur_32(instr); + case Format::MIMG: + return hash_murmur_32(instr); + case Format::MTBUF: + return hash_murmur_32(instr); + case Format::FLAT: + return hash_murmur_32(instr); + case Format::PSEUDO_BRANCH: + return hash_murmur_32(instr); + case Format::PSEUDO_REDUCTION: + return hash_murmur_32(instr); default: - break; + return hash_murmur_32(instr); } - - return hash; } }; @@ -272,7 +317,13 @@ struct vn_ctx { */ uint32_t exec_id = 1; - vn_ctx(Program* program) : program(program) {} + vn_ctx(Program* program) : program(program) { + static_assert(sizeof(Temp) == 4, "Temp must fit in 32bits"); + unsigned size = 0; + for (Block& block : program->blocks) + size += block.instructions.size(); + expr_values.reserve(size); + } }; -- 2.30.2