From: Gabe Black Date: Sun, 15 Apr 2012 06:24:18 +0000 (-0700) Subject: X86: Use the AddrTrie class to implement the TLB. X-Git-Tag: stable_2012_06_28~120 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=aacb676220ac1e6049304bef31a39090487da71e;p=gem5.git X86: Use the AddrTrie class to implement the TLB. This change also adjusts the TlbEntry class so that it stores the number of address bits wide a page is rather than its size in bytes. In other words, instead of storing 4K for a 4K page, it stores 12. 12 is easy to turn into 4K, but it's a little harder going the other way. --- diff --git a/src/arch/x86/pagetable.cc b/src/arch/x86/pagetable.cc index a7717def7..bfd2efe61 100644 --- a/src/arch/x86/pagetable.cc +++ b/src/arch/x86/pagetable.cc @@ -45,8 +45,8 @@ namespace X86ISA { TlbEntry::TlbEntry(Addr asn, Addr _vaddr, Addr _paddr) : - paddr(_paddr), vaddr(_vaddr), size(PageBytes), writable(true), user(true), - uncacheable(false), global(false), patBit(0), noExec(false) + paddr(_paddr), vaddr(_vaddr), logBytes(PageShift), writable(true), + user(true), uncacheable(false), global(false), patBit(0), noExec(false) {} void @@ -54,13 +54,14 @@ TlbEntry::serialize(std::ostream &os) { SERIALIZE_SCALAR(paddr); SERIALIZE_SCALAR(vaddr); - SERIALIZE_SCALAR(size); + SERIALIZE_SCALAR(logBytes); SERIALIZE_SCALAR(writable); SERIALIZE_SCALAR(user); SERIALIZE_SCALAR(uncacheable); SERIALIZE_SCALAR(global); SERIALIZE_SCALAR(patBit); SERIALIZE_SCALAR(noExec); + SERIALIZE_SCALAR(lruSeq); } void @@ -68,13 +69,14 @@ TlbEntry::unserialize(Checkpoint *cp, const std::string §ion) { UNSERIALIZE_SCALAR(paddr); UNSERIALIZE_SCALAR(vaddr); - UNSERIALIZE_SCALAR(size); + UNSERIALIZE_SCALAR(logBytes); UNSERIALIZE_SCALAR(writable); UNSERIALIZE_SCALAR(user); UNSERIALIZE_SCALAR(uncacheable); UNSERIALIZE_SCALAR(global); UNSERIALIZE_SCALAR(patBit); UNSERIALIZE_SCALAR(noExec); + UNSERIALIZE_SCALAR(lruSeq); } } diff --git a/src/arch/x86/pagetable.hh b/src/arch/x86/pagetable.hh index 768de65bc..8a6e71f3b 100644 --- a/src/arch/x86/pagetable.hh +++ b/src/arch/x86/pagetable.hh @@ -46,9 +46,17 @@ #include "base/bitunion.hh" #include "base/misc.hh" #include "base/types.hh" +#include "base/trie.hh" class Checkpoint; +namespace X86ISA +{ + struct TlbEntry; +} + +typedef Trie TlbEntryTrie; + namespace X86ISA { BitUnion64(VAddr) @@ -72,8 +80,8 @@ namespace X86ISA // The beginning of the virtual page this entry maps. Addr vaddr; - // The size of the page this entry represents. - Addr size; + // The size of the page this represents, in address bits. + unsigned logBytes; // Read permission is always available, assuming it isn't blocked by // other mechanisms. @@ -91,6 +99,10 @@ namespace X86ISA bool patBit; // Whether or not memory on this page can be executed. bool noExec; + // A sequence number to keep track of LRU. + uint64_t lruSeq; + + TlbEntryTrie::Handle trieHandle; TlbEntry(Addr asn, Addr _vaddr, Addr _paddr); TlbEntry() {} diff --git a/src/arch/x86/pagetable_walker.cc b/src/arch/x86/pagetable_walker.cc index 9e1d08c7b..960954f15 100644 --- a/src/arch/x86/pagetable_walker.cc +++ b/src/arch/x86/pagetable_walker.cc @@ -54,12 +54,12 @@ #include "arch/x86/tlb.hh" #include "arch/x86/vtophys.hh" #include "base/bitfield.hh" +#include "base/trie.hh" #include "cpu/base.hh" #include "cpu/thread_context.hh" #include "debug/PageTableWalker.hh" #include "mem/packet_access.hh" #include "mem/request.hh" -#include "sim/system.hh" namespace X86ISA { @@ -106,11 +106,11 @@ Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation, } Fault -Walker::startFunctional(ThreadContext * _tc, Addr &addr, Addr &pageSize, +Walker::startFunctional(ThreadContext * _tc, Addr &addr, unsigned &logBytes, BaseTLB::Mode _mode) { funcState.initState(_tc, _mode); - return funcState.startFunctional(addr, pageSize); + return funcState.startFunctional(addr, logBytes); } bool @@ -224,7 +224,7 @@ Walker::WalkerState::startWalk() } Fault -Walker::WalkerState::startFunctional(Addr &addr, Addr &pageSize) +Walker::WalkerState::startFunctional(Addr &addr, unsigned &logBytes) { Fault fault = NoFault; assert(started == false); @@ -241,7 +241,7 @@ Walker::WalkerState::startFunctional(Addr &addr, Addr &pageSize) state = nextState; nextState = Ready; } while(read); - pageSize = entry.size; + logBytes = entry.logBytes; addr = entry.paddr; return fault; @@ -311,14 +311,14 @@ Walker::WalkerState::stepWalk(PacketPtr &write) } if (!pte.ps) { // 4 KB page - entry.size = 4 * (1 << 10); + entry.logBytes = 12; nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.longl1 * dataSize; nextState = LongPTE; break; } else { // 2 MB page - entry.size = 2 * (1 << 20); + entry.logBytes = 21; entry.paddr = (uint64_t)pte & (mask(31) << 21); entry.uncacheable = uncacheable; entry.global = pte.g; @@ -373,13 +373,13 @@ Walker::WalkerState::stepWalk(PacketPtr &write) } if (!pte.ps) { // 4 KB page - entry.size = 4 * (1 << 10); + entry.logBytes = 12; nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael1 * dataSize; nextState = PAEPTE; break; } else { // 2 MB page - entry.size = 2 * (1 << 20); + entry.logBytes = 21; entry.paddr = (uint64_t)pte & (mask(31) << 21); entry.uncacheable = uncacheable; entry.global = pte.g; @@ -423,14 +423,14 @@ Walker::WalkerState::stepWalk(PacketPtr &write) } if (!pte.ps) { // 4 KB page - entry.size = 4 * (1 << 10); + entry.logBytes = 12; nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize; nextState = PTE; break; } else { // 4 MB page - entry.size = 4 * (1 << 20); + entry.logBytes = 21; entry.paddr = bits(pte, 20, 13) << 32 | bits(pte, 31, 22) << 22; entry.uncacheable = uncacheable; entry.global = pte.g; @@ -453,7 +453,7 @@ Walker::WalkerState::stepWalk(PacketPtr &write) break; } // 4 KB page - entry.size = 4 * (1 << 10); + entry.logBytes = 12; nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * dataSize; nextState = PTE; break; diff --git a/src/arch/x86/pagetable_walker.hh b/src/arch/x86/pagetable_walker.hh index 3cc20b6cd..c4c770884 100644 --- a/src/arch/x86/pagetable_walker.hh +++ b/src/arch/x86/pagetable_walker.hh @@ -132,7 +132,7 @@ namespace X86ISA void initState(ThreadContext * _tc, BaseTLB::Mode _mode, bool _isTiming = false); Fault startWalk(); - Fault startFunctional(Addr &addr, Addr &pageSize); + Fault startFunctional(Addr &addr, unsigned &logBytes); bool recvPacket(PacketPtr pkt); bool isRetrying(); bool wasStarted(); @@ -169,7 +169,7 @@ namespace X86ISA Fault start(ThreadContext * _tc, BaseTLB::Translation *translation, RequestPtr req, BaseTLB::Mode mode); Fault startFunctional(ThreadContext * _tc, Addr &addr, - Addr &pageSize, BaseTLB::Mode mode); + unsigned &logBytes, BaseTLB::Mode mode); MasterPort &getMasterPort(const std::string &if_name, int idx = -1); protected: diff --git a/src/arch/x86/tlb.cc b/src/arch/x86/tlb.cc index 100f8cf0f..caa3efc1e 100644 --- a/src/arch/x86/tlb.cc +++ b/src/arch/x86/tlb.cc @@ -60,75 +60,79 @@ namespace X86ISA { -TLB::TLB(const Params *p) : BaseTLB(p), configAddress(0), size(p->size) +TLB::TLB(const Params *p) : BaseTLB(p), configAddress(0), size(p->size), + lruSeq(0) { + if (!size) + fatal("TLBs must have a non-zero size.\n"); tlb = new TlbEntry[size]; std::memset(tlb, 0, sizeof(TlbEntry) * size); - for (int x = 0; x < size; x++) + for (int x = 0; x < size; x++) { + tlb[x].trieHandle = NULL; freeList.push_back(&tlb[x]); + } walker = p->walker; walker->setTLB(this); } +void +TLB::evictLRU() +{ + // Find the entry with the lowest (and hence least recently updated) + // sequence number. + + unsigned lru = 0; + for (unsigned i = 1; i < size; i++) { + if (tlb[i].lruSeq < tlb[lru].lruSeq) + lru = i; + } + + assert(tlb[lru].trieHandle); + trie.remove(tlb[lru].trieHandle); + tlb[lru].trieHandle = NULL; + freeList.push_back(&tlb[lru]); +} + TlbEntry * TLB::insert(Addr vpn, TlbEntry &entry) { //TODO Deal with conflicting entries TlbEntry *newEntry = NULL; - if (!freeList.empty()) { - newEntry = freeList.front(); - freeList.pop_front(); - } else { - newEntry = entryList.back(); - entryList.pop_back(); - } + if (freeList.empty()) + evictLRU(); + newEntry = freeList.front(); + freeList.pop_front(); + *newEntry = entry; + newEntry->lruSeq = nextSeq(); newEntry->vaddr = vpn; - entryList.push_front(newEntry); + newEntry->trieHandle = + trie.insert(vpn, TlbEntryTrie::MaxBits - entry.logBytes, newEntry); return newEntry; } -TLB::EntryList::iterator -TLB::lookupIt(Addr va, bool update_lru) -{ - //TODO make this smarter at some point - EntryList::iterator entry; - for (entry = entryList.begin(); entry != entryList.end(); entry++) { - if ((*entry)->vaddr <= va && (*entry)->vaddr + (*entry)->size > va) { - DPRINTF(TLB, "Matched vaddr %#x to entry starting at %#x " - "with size %#x.\n", va, (*entry)->vaddr, (*entry)->size); - if (update_lru) { - entryList.push_front(*entry); - entryList.erase(entry); - entry = entryList.begin(); - } - break; - } - } - return entry; -} - TlbEntry * TLB::lookup(Addr va, bool update_lru) { - EntryList::iterator entry = lookupIt(va, update_lru); - if (entry == entryList.end()) - return NULL; - else - return *entry; + TlbEntry *entry = trie.lookup(va); + if (entry && update_lru) + entry->lruSeq = nextSeq(); + return entry; } void TLB::invalidateAll() { DPRINTF(TLB, "Invalidating all entries.\n"); - while (!entryList.empty()) { - TlbEntry *entry = entryList.front(); - entryList.pop_front(); - freeList.push_back(entry); + for (unsigned i = 0; i < size; i++) { + if (tlb[i].trieHandle) { + trie.remove(tlb[i].trieHandle); + tlb[i].trieHandle = NULL; + freeList.push_back(&tlb[i]); + } } } @@ -142,13 +146,11 @@ void TLB::invalidateNonGlobal() { DPRINTF(TLB, "Invalidating all non global entries.\n"); - EntryList::iterator entryIt; - for (entryIt = entryList.begin(); entryIt != entryList.end();) { - if (!(*entryIt)->global) { - freeList.push_back(*entryIt); - entryList.erase(entryIt++); - } else { - entryIt++; + for (unsigned i = 0; i < size; i++) { + if (tlb[i].trieHandle && !tlb[i].global) { + trie.remove(tlb[i].trieHandle); + tlb[i].trieHandle = NULL; + freeList.push_back(&tlb[i]); } } } @@ -156,10 +158,11 @@ TLB::invalidateNonGlobal() void TLB::demapPage(Addr va, uint64_t asn) { - EntryList::iterator entry = lookupIt(va, false); - if (entry != entryList.end()) { - freeList.push_back(*entry); - entryList.erase(entry); + TlbEntry *entry = trie.lookup(va); + if (entry) { + trie.remove(entry->trieHandle); + entry->trieHandle = NULL; + freeList.push_back(entry); } } @@ -345,7 +348,7 @@ TLB::translate(RequestPtr req, ThreadContext *tc, Translation *translation, return new PageFault(vaddr, true, Write, inUser, false); } - Addr paddr = entry->paddr | (vaddr & (entry->size-1)); + Addr paddr = entry->paddr | (vaddr & mask(entry->logBytes)); DPRINTF(TLB, "Translated %#x -> %#x.\n", vaddr, paddr); req->setPaddr(paddr); if (entry->uncacheable) diff --git a/src/arch/x86/tlb.hh b/src/arch/x86/tlb.hh index bcadda762..1d1204cfe 100644 --- a/src/arch/x86/tlb.hh +++ b/src/arch/x86/tlb.hh @@ -46,6 +46,7 @@ #include "arch/x86/regs/segment.hh" #include "arch/x86/pagetable.hh" +#include "base/trie.hh" #include "mem/mem_object.hh" #include "mem/request.hh" #include "params/X86TLB.hh" @@ -103,6 +104,9 @@ namespace X86ISA EntryList freeList; EntryList entryList; + TlbEntryTrie trie; + uint64_t lruSeq; + Fault translateInt(RequestPtr req, ThreadContext *tc); Fault translate(RequestPtr req, ThreadContext *tc, @@ -111,6 +115,14 @@ namespace X86ISA public: + void evictLRU(); + + uint64_t + nextSeq() + { + return ++lruSeq; + } + Fault translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode); void translateTiming(RequestPtr req, ThreadContext *tc, Translation *translation, Mode mode); diff --git a/src/arch/x86/vtophys.cc b/src/arch/x86/vtophys.cc index e4abfca59..9fd9cc78d 100644 --- a/src/arch/x86/vtophys.cc +++ b/src/arch/x86/vtophys.cc @@ -61,12 +61,13 @@ namespace X86ISA vtophys(ThreadContext *tc, Addr vaddr) { Walker *walker = tc->getDTBPtr()->getWalker(); - Addr size; + unsigned logBytes; Addr addr = vaddr; - Fault fault = walker->startFunctional(tc, addr, size, BaseTLB::Read); + Fault fault = walker->startFunctional( + tc, addr, logBytes, BaseTLB::Read); if (fault != NoFault) panic("vtophys page walk returned fault\n"); - Addr masked_addr = vaddr & (size - 1); + Addr masked_addr = vaddr & mask(logBytes); Addr paddr = addr | masked_addr; DPRINTF(VtoPhys, "vtophys(%#x) -> %#x\n", vaddr, paddr); return paddr;