From 40fdba2454c219902db7ad1abd28593de8611c2b Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Wed, 25 Feb 2009 10:16:21 -0800 Subject: [PATCH] X86: Make the X86 TLB take advantage of delayed translations, and get rid of the fake TLB miss faults. --- src/arch/x86/faults.cc | 50 --------- src/arch/x86/faults.hh | 32 ------ src/arch/x86/pagetable_walker.cc | 170 ++++++++++++++++++------------- src/arch/x86/pagetable_walker.hh | 26 ++++- src/arch/x86/tlb.cc | 86 +++++++++++----- src/arch/x86/tlb.hh | 15 +-- 6 files changed, 187 insertions(+), 192 deletions(-) diff --git a/src/arch/x86/faults.cc b/src/arch/x86/faults.cc index f01197f36..964eb0a7f 100644 --- a/src/arch/x86/faults.cc +++ b/src/arch/x86/faults.cc @@ -163,56 +163,6 @@ namespace X86ISA } } - void FakeITLBFault::invoke(ThreadContext * tc) - { - // Start the page table walker. - tc->getITBPtr()->walk(tc, vaddr, write, execute); - } - - void FakeDTLBFault::invoke(ThreadContext * tc) - { - // Start the page table walker. - tc->getDTBPtr()->walk(tc, vaddr, write, execute); - } - -#else // !FULL_SYSTEM - void FakeITLBFault::invoke(ThreadContext * tc) - { - DPRINTF(TLB, "Invoking an ITLB fault for address %#x at pc %#x.\n", - vaddr, tc->readPC()); - Process *p = tc->getProcessPtr(); - TlbEntry entry; - bool success = p->pTable->lookup(vaddr, entry); - if(!success) { - panic("Tried to execute unmapped address %#x.\n", vaddr); - } else { - Addr alignedVaddr = p->pTable->pageAlign(vaddr); - DPRINTF(TLB, "Mapping %#x to %#x\n", alignedVaddr, - entry.pageStart()); - tc->getITBPtr()->insert(alignedVaddr, entry); - } - } - - void FakeDTLBFault::invoke(ThreadContext * tc) - { - DPRINTF(TLB, "Invoking an DTLB fault for address %#x at pc %#x.\n", - vaddr, tc->readPC()); - Process *p = tc->getProcessPtr(); - TlbEntry entry; - bool success = p->pTable->lookup(vaddr, entry); - if(!success) { - p->checkAndAllocNextPage(vaddr); - success = p->pTable->lookup(vaddr, entry); - } - if(!success) { - panic("Tried to access unmapped address %#x.\n", vaddr); - } else { - Addr alignedVaddr = p->pTable->pageAlign(vaddr); - DPRINTF(TLB, "Mapping %#x to %#x\n", alignedVaddr, - entry.pageStart()); - tc->getDTBPtr()->insert(alignedVaddr, entry); - } - } #endif } // namespace X86ISA diff --git a/src/arch/x86/faults.hh b/src/arch/x86/faults.hh index ae4314434..3753e60e5 100644 --- a/src/arch/x86/faults.hh +++ b/src/arch/x86/faults.hh @@ -422,38 +422,6 @@ namespace X86ISA return true; } }; - - // These faults aren't part of the ISA definition. They trigger filling - // the tlb on a miss and are to take the place of a hardware table walker. - class FakeITLBFault : public X86Fault - { - protected: - Addr vaddr; - bool write; - bool execute; - public: - FakeITLBFault(Addr _vaddr, bool _write, bool _execute) : - X86Fault("fake instruction tlb fault", "itlb", 0), - vaddr(_vaddr), write(_write), execute(_execute) - {} - - void invoke(ThreadContext * tc); - }; - - class FakeDTLBFault : public X86Fault - { - protected: - Addr vaddr; - bool write; - bool execute; - public: - FakeDTLBFault(Addr _vaddr, bool _write, bool _execute) : - X86Fault("fake data tlb fault", "dtlb", 0), - vaddr(_vaddr), write(_write), execute(_execute) - {} - - void invoke(ThreadContext * tc); - }; }; #endif // __ARCH_X86_FAULTS_HH__ diff --git a/src/arch/x86/pagetable_walker.cc b/src/arch/x86/pagetable_walker.cc index b0b9209b5..fe3a4c3bb 100644 --- a/src/arch/x86/pagetable_walker.cc +++ b/src/arch/x86/pagetable_walker.cc @@ -84,7 +84,7 @@ BitUnion64(PageTableEntry) Bitfield<0> p; EndBitUnion(PageTableEntry) -void +Fault Walker::doNext(PacketPtr &read, PacketPtr &write) { assert(state != Ready && state != Waiting); @@ -106,11 +106,11 @@ Walker::doNext(PacketPtr &read, PacketPtr &write) pte.a = 1; entry.writable = pte.w; entry.user = pte.u; - if (badNX) - panic("NX violation!\n"); + if (badNX || !pte.p) { + stop(); + return pageFault(pte.p); + } entry.noExec = pte.nx; - if (!pte.p) - panic("Page at %#x not present!\n", entry.vaddr); nextState = LongPDP; break; case LongPDP: @@ -119,10 +119,10 @@ Walker::doNext(PacketPtr &read, PacketPtr &write) pte.a = 1; entry.writable = entry.writable && pte.w; entry.user = entry.user && pte.u; - if (badNX) - panic("NX violation!\n"); - if (!pte.p) - panic("Page at %#x not present!\n", entry.vaddr); + if (badNX || !pte.p) { + stop(); + return pageFault(pte.p); + } nextState = LongPD; break; case LongPD: @@ -130,10 +130,10 @@ Walker::doNext(PacketPtr &read, PacketPtr &write) pte.a = 1; entry.writable = entry.writable && pte.w; entry.user = entry.user && pte.u; - if (badNX) - panic("NX violation!\n"); - if (!pte.p) - panic("Page at %#x not present!\n", entry.vaddr); + if (badNX || !pte.p) { + stop(); + return pageFault(pte.p); + } if (!pte.ps) { // 4 KB page entry.size = 4 * (1 << 10); @@ -150,36 +150,32 @@ Walker::doNext(PacketPtr &read, PacketPtr &write) entry.patBit = bits(pte, 12); entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1); tlb->insert(entry.vaddr, entry); - nextState = Ready; - delete read->req; - delete read; - read = NULL; - return; + stop(); + return NoFault; } case LongPTE: doWrite = !pte.a; pte.a = 1; entry.writable = entry.writable && pte.w; entry.user = entry.user && pte.u; - if (badNX) - panic("NX violation!\n"); - if (!pte.p) - panic("Page at %#x not present!\n", entry.vaddr); + if (badNX || !pte.p) { + stop(); + return pageFault(pte.p); + } entry.paddr = (uint64_t)pte & (mask(40) << 12); entry.uncacheable = uncacheable; entry.global = pte.g; entry.patBit = bits(pte, 12); entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1); tlb->insert(entry.vaddr, entry); - nextState = Ready; - delete read->req; - delete read; - read = NULL; - return; + stop(); + return NoFault; case PAEPDP: nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * size; - if (!pte.p) - panic("Page at %#x not present!\n", entry.vaddr); + if (!pte.p) { + stop(); + return pageFault(pte.p); + } nextState = PAEPD; break; case PAEPD: @@ -187,10 +183,10 @@ Walker::doNext(PacketPtr &read, PacketPtr &write) pte.a = 1; entry.writable = pte.w; entry.user = pte.u; - if (badNX) - panic("NX violation!\n"); - if (!pte.p) - panic("Page at %#x not present!\n", entry.vaddr); + if (badNX || !pte.p) { + stop(); + return pageFault(pte.p); + } if (!pte.ps) { // 4 KB page entry.size = 4 * (1 << 10); @@ -206,39 +202,35 @@ Walker::doNext(PacketPtr &read, PacketPtr &write) entry.patBit = bits(pte, 12); entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1); tlb->insert(entry.vaddr, entry); - nextState = Ready; - delete read->req; - delete read; - read = NULL; - return; + stop(); + return NoFault; } case PAEPTE: doWrite = !pte.a; pte.a = 1; entry.writable = entry.writable && pte.w; entry.user = entry.user && pte.u; - if (badNX) - panic("NX violation!\n"); - if (!pte.p) - panic("Page at %#x not present!\n", entry.vaddr); + if (badNX || !pte.p) { + stop(); + return pageFault(pte.p); + } entry.paddr = (uint64_t)pte & (mask(40) << 12); entry.uncacheable = uncacheable; entry.global = pte.g; entry.patBit = bits(pte, 7); entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1); tlb->insert(entry.vaddr, entry); - nextState = Ready; - delete read->req; - delete read; - read = NULL; - return; + stop(); + return NoFault; case PSEPD: doWrite = !pte.a; pte.a = 1; entry.writable = pte.w; entry.user = pte.u; - if (!pte.p) - panic("Page at %#x not present!\n", entry.vaddr); + if (!pte.p) { + stop(); + return pageFault(pte.p); + } if (!pte.ps) { // 4 KB page entry.size = 4 * (1 << 10); @@ -255,44 +247,40 @@ Walker::doNext(PacketPtr &read, PacketPtr &write) entry.patBit = bits(pte, 12); entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1); tlb->insert(entry.vaddr, entry); - nextState = Ready; - delete read->req; - delete read; - read = NULL; - return; + stop(); + return NoFault; } case PD: doWrite = !pte.a; pte.a = 1; entry.writable = pte.w; entry.user = pte.u; - if (!pte.p) - panic("Page at %#x not present!\n", entry.vaddr); + if (!pte.p) { + stop(); + return pageFault(pte.p); + } // 4 KB page entry.size = 4 * (1 << 10); nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * size; nextState = PTE; break; - nextState = PTE; - break; case PTE: doWrite = !pte.a; pte.a = 1; entry.writable = pte.w; entry.user = pte.u; - if (!pte.p) - panic("Page at %#x not present!\n", entry.vaddr); + if (!pte.p) { + stop(); + return pageFault(pte.p); + } entry.paddr = (uint64_t)pte & (mask(20) << 12); entry.uncacheable = uncacheable; entry.global = pte.g; entry.patBit = bits(pte, 7); entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1); tlb->insert(entry.vaddr, entry); - nextState = Ready; - delete read->req; - delete read; - read = NULL; - return; + stop(); + return NoFault; default: panic("Unknown page table walker state %d!\n"); } @@ -316,16 +304,21 @@ Walker::doNext(PacketPtr &read, PacketPtr &write) delete oldRead->req; delete oldRead; } + return NoFault; } -void -Walker::start(ThreadContext * _tc, Addr vaddr, bool _write, bool _execute) +Fault +Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation, + RequestPtr _req, bool _write, bool _execute) { assert(state == Ready); assert(!tc); tc = _tc; + req = _req; + Addr vaddr = req->getVaddr(); execute = _execute; write = _write; + translation = _translation; VAddr addr = vaddr; @@ -339,6 +332,7 @@ Walker::start(ThreadContext * _tc, Addr vaddr, bool _write, bool _execute) // Do long mode. state = LongPML4; top = (cr3.longPdtb << 12) + addr.longl4 * size; + enableNX = efer.nxe; } else { // We're in some flavor of legacy mode. CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4); @@ -346,6 +340,7 @@ Walker::start(ThreadContext * _tc, Addr vaddr, bool _write, bool _execute) // Do legacy PAE. state = PAEPDP; top = (cr3.paePdtb << 5) + addr.pael3 * size; + enableNX = efer.nxe; } else { size = 4; top = (cr3.pdtb << 12) + addr.norml2 * size; @@ -356,14 +351,13 @@ Walker::start(ThreadContext * _tc, Addr vaddr, bool _write, bool _execute) // Do legacy non PSE. state = PD; } + enableNX = false; } } nextState = Ready; entry.vaddr = vaddr; - enableNX = efer.nxe; - Request::Flags flags = Request::PHYSICAL; if (cr3.pcd) flags.set(Request::UNCACHEABLE); @@ -372,13 +366,15 @@ Walker::start(ThreadContext * _tc, Addr vaddr, bool _write, bool _execute) read->allocate(); Enums::MemoryMode memMode = sys->getMemoryMode(); if (memMode == Enums::timing) { - tc->suspend(); + timingFault = NoFault; port.sendTiming(read); } else if (memMode == Enums::atomic) { + Fault fault; do { port.sendAtomic(read); PacketPtr write = NULL; - doNext(read, write); + fault = doNext(read, write); + assert(fault == NoFault || read == NULL); state = nextState; nextState = Ready; if (write) @@ -387,9 +383,11 @@ Walker::start(ThreadContext * _tc, Addr vaddr, bool _write, bool _execute) tc = NULL; state = Ready; nextState = Waiting; + return fault; } else { panic("Unrecognized memory system mode.\n"); } + return NoFault; } bool @@ -410,9 +408,10 @@ Walker::recvTiming(PacketPtr pkt) state = nextState; nextState = Ready; PacketPtr write = NULL; - doNext(pkt, write); + timingFault = doNext(pkt, write); state = Waiting; read = pkt; + assert(timingFault == NoFault || read == NULL); if (write) { writes.push_back(write); } @@ -421,10 +420,27 @@ Walker::recvTiming(PacketPtr pkt) sendPackets(); } if (inflight == 0 && read == NULL && writes.size() == 0) { - tc->activate(0); tc = NULL; state = Ready; nextState = Waiting; + if (timingFault == NoFault) { + /* + * Finish the translation. Now that we now the right entry is + * in the TLB, this should work with no memory accesses. + * There could be new faults unrelated to the table walk like + * permissions violations, so we'll need the return value as + * well. + */ + bool delayedResponse; + Fault fault = tlb->translate(req, tc, NULL, write, execute, + delayedResponse, true); + assert(!delayedResponse); + // Let the CPU continue. + translation->finish(fault, req, tc, write); + } else { + // There was a fault during the walk. Let the CPU know. + translation->finish(timingFault, req, tc, write); + } } } else if (pkt->wasNacked()) { pkt->reinitNacked(); @@ -525,6 +541,14 @@ Walker::getPort(const std::string &if_name, int idx) panic("No page table walker port named %s!\n", if_name); } +Fault +Walker::pageFault(bool present) +{ + HandyM5Reg m5reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); + return new PageFault(entry.vaddr, present, write, + m5reg.cpl == 3, false, execute && enableNX); +} + } X86ISA::Walker * diff --git a/src/arch/x86/pagetable_walker.hh b/src/arch/x86/pagetable_walker.hh index de3f21195..992711acd 100644 --- a/src/arch/x86/pagetable_walker.hh +++ b/src/arch/x86/pagetable_walker.hh @@ -91,11 +91,22 @@ namespace X86ISA // if the machine is finished, or points to a packet to initiate // the next read. If any write is required to update an "accessed" // bit, write will point to a packet to do the write. Otherwise it - // will be NULL. - void doNext(PacketPtr &read, PacketPtr &write); + // will be NULL. The return value is whatever fault was incurred + // during this stage of the lookup. + Fault doNext(PacketPtr &read, PacketPtr &write); // Kick off the state machine. - void start(ThreadContext * _tc, Addr vaddr, bool write, bool execute); + Fault start(ThreadContext * _tc, BaseTLB::Translation *translation, + RequestPtr req, bool write, bool execute); + // Clean up after the state machine. + void + stop() + { + nextState = Ready; + delete read->req; + delete read; + read = NULL; + } protected: @@ -110,6 +121,11 @@ namespace X86ISA bool retrying; + /* + * The fault, if any, that's waiting to be delivered in timing mode. + */ + Fault timingFault; + /* * Functions for dealing with packets. */ @@ -156,16 +172,18 @@ namespace X86ISA // The TLB we're supposed to load. TLB * tlb; System * sys; + BaseTLB::Translation * translation; /* * State machine state. */ ThreadContext * tc; + RequestPtr req; State state; State nextState; int size; bool enableNX; - bool write, execute; + bool write, execute, user; TlbEntry entry; Fault pageFault(bool present); diff --git a/src/arch/x86/tlb.cc b/src/arch/x86/tlb.cc index a34922b44..3962cd607 100644 --- a/src/arch/x86/tlb.cc +++ b/src/arch/x86/tlb.cc @@ -72,6 +72,9 @@ #if FULL_SYSTEM #include "arch/x86/pagetable_walker.hh" +#else +#include "mem/page_table.hh" +#include "sim/process.hh" #endif namespace X86ISA { @@ -90,7 +93,7 @@ TLB::TLB(const Params *p) : BaseTLB(p), configAddress(0), size(p->size) #endif } -void +TlbEntry * TLB::insert(Addr vpn, TlbEntry &entry) { //TODO Deal with conflicting entries @@ -106,6 +109,7 @@ TLB::insert(Addr vpn, TlbEntry &entry) *newEntry = entry; newEntry->vaddr = vpn; entryList.push_front(newEntry); + return newEntry; } TLB::EntryList::iterator @@ -138,14 +142,6 @@ TLB::lookup(Addr va, bool update_lru) return *entry; } -#if FULL_SYSTEM -void -TLB::walk(ThreadContext * _tc, Addr vaddr, bool write, bool execute) -{ - walker->start(_tc, vaddr, write, execute); -} -#endif - void TLB::invalidateAll() { @@ -188,11 +184,12 @@ TLB::demapPage(Addr va, uint64_t asn) } } -template Fault -TLB::translateAtomic(RequestPtr req, ThreadContext *tc, - bool write, bool execute) +TLB::translate(RequestPtr req, ThreadContext *tc, + Translation *translation, bool write, bool execute, + bool &delayedResponse, bool timing) { + delayedResponse = false; Addr vaddr = req->getVaddr(); DPRINTF(TLB, "Translating vaddr %#x.\n", vaddr); uint32_t flags = req->getFlags(); @@ -617,14 +614,45 @@ TLB::translateAtomic(RequestPtr req, ThreadContext *tc, // The vaddr already has the segment base applied. TlbEntry *entry = lookup(vaddr); if (!entry) { - return new TlbFault(vaddr, write, execute); - } else { - // Do paging protection checks. - DPRINTF(TLB, "Entry found with paddr %#x, doing protection checks.\n", entry->paddr); - Addr paddr = entry->paddr | (vaddr & (entry->size-1)); - DPRINTF(TLB, "Translated %#x -> %#x.\n", vaddr, paddr); - req->setPaddr(paddr); +#if FULL_SYSTEM + Fault fault = walker->start(tc, translation, req, + write, execute); + if (timing || fault != NoFault) { + // This gets ignored in atomic mode. + delayedResponse = true; + return fault; + } + entry = lookup(vaddr); + assert(entry); +#else + DPRINTF(TLB, "Handling a TLB miss for " + "address %#x at pc %#x.\n", + vaddr, tc->readPC()); + + Process *p = tc->getProcessPtr(); + TlbEntry newEntry; + bool success = p->pTable->lookup(vaddr, newEntry); + if(!success && !execute) { + p->checkAndAllocNextPage(vaddr); + success = p->pTable->lookup(vaddr, newEntry); + } + if(!success) { + panic("Tried to execute unmapped address %#x.\n", vaddr); + } else { + Addr alignedVaddr = p->pTable->pageAlign(vaddr); + DPRINTF(TLB, "Mapping %#x to %#x\n", alignedVaddr, + newEntry.pageStart()); + entry = insert(alignedVaddr, newEntry); + } + DPRINTF(TLB, "Miss was serviced.\n"); +#endif } + // Do paging protection checks. + DPRINTF(TLB, "Entry found with paddr %#x, " + "doing protection checks.\n", entry->paddr); + Addr paddr = entry->paddr | (vaddr & (entry->size-1)); + DPRINTF(TLB, "Translated %#x -> %#x.\n", vaddr, paddr); + req->setPaddr(paddr); } else { //Use the address which already has segmentation applied. DPRINTF(TLB, "Paging disabled.\n"); @@ -665,29 +693,41 @@ TLB::translateAtomic(RequestPtr req, ThreadContext *tc, Fault DTB::translateAtomic(RequestPtr req, ThreadContext *tc, bool write) { - return TLB::translateAtomic(req, tc, write, false); + bool delayedResponse; + return TLB::translate(req, tc, NULL, write, + false, delayedResponse, false); } void DTB::translateTiming(RequestPtr req, ThreadContext *tc, Translation *translation, bool write) { + bool delayedResponse; assert(translation); - translation->finish(translateAtomic(req, tc, write), req, tc, write); + Fault fault = TLB::translate(req, tc, translation, + write, false, delayedResponse, true); + if (!delayedResponse) + translation->finish(fault, req, tc, write); } Fault ITB::translateAtomic(RequestPtr req, ThreadContext *tc) { - return TLB::translateAtomic(req, tc, false, true); + bool delayedResponse; + return TLB::translate(req, tc, NULL, false, + true, delayedResponse, false); } void ITB::translateTiming(RequestPtr req, ThreadContext *tc, Translation *translation) { + bool delayedResponse; assert(translation); - translation->finish(translateAtomic(req, tc), req, tc, false); + Fault fault = TLB::translate(req, tc, translation, + false, true, delayedResponse, true); + if (!delayedResponse) + translation->finish(fault, req, tc, false); } #if FULL_SYSTEM diff --git a/src/arch/x86/tlb.hh b/src/arch/x86/tlb.hh index 56730983a..2467bc472 100644 --- a/src/arch/x86/tlb.hh +++ b/src/arch/x86/tlb.hh @@ -87,8 +87,7 @@ namespace X86ISA class TLB : public BaseTLB { protected: - friend class FakeITLBFault; - friend class FakeDTLBFault; + friend class Walker; typedef std::list EntryList; @@ -118,8 +117,6 @@ namespace X86ISA protected: Walker * walker; - - void walk(ThreadContext * _tc, Addr vaddr, bool write, bool execute); #endif public: @@ -137,15 +134,13 @@ namespace X86ISA EntryList freeList; EntryList entryList; - template - Fault translateAtomic(RequestPtr req, ThreadContext *tc, - bool write, bool execute); - void translateTiming(RequestPtr req, ThreadContext *tc, - Translation *translation, bool write, bool execute); + Fault translate(RequestPtr req, ThreadContext *tc, + Translation *translation, bool write, bool execute, + bool &delayedResponse, bool timing); public: - void insert(Addr vpn, TlbEntry &entry); + TlbEntry * insert(Addr vpn, TlbEntry &entry); // Checkpointing virtual void serialize(std::ostream &os); -- 2.30.2