src/gpu-compute/gpu_tlb.cc

   1 /*
   2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
   3  * All rights reserved.
   4  *
   5  * For use for simulation and test purposes only
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions are met:
   9  *
  10  * 1. Redistributions of source code must retain the above copyright notice,
  11  * this list of conditions and the following disclaimer.
  12  *
  13  * 2. Redistributions in binary form must reproduce the above copyright notice,
  14  * this list of conditions and the following disclaimer in the documentation
  15  * and/or other materials provided with the distribution.
  16  *
  17  * 3. Neither the name of the copyright holder nor the names of its contributors
  18  * may be used to endorse or promote products derived from this software
  19  * without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31  * POSSIBILITY OF SUCH DAMAGE.
  32  *
  33  * Author: Lisa Hsu
  34  */
  35
  36 #include "gpu-compute/gpu_tlb.hh"
  37
  38 #include <cmath>
  39 #include <cstring>
  40
  41 #include "arch/x86/faults.hh"
  42 #include "arch/x86/insts/microldstop.hh"
  43 #include "arch/x86/pagetable.hh"
  44 #include "arch/x86/pagetable_walker.hh"
  45 #include "arch/x86/regs/misc.hh"
  46 #include "arch/x86/regs/msr.hh"
  47 #include "arch/x86/x86_traits.hh"
  48 #include "base/bitfield.hh"
  49 #include "base/logging.hh"
  50 #include "base/output.hh"
  51 #include "base/trace.hh"
  52 #include "cpu/base.hh"
  53 #include "cpu/thread_context.hh"
  54 #include "debug/GPUPrefetch.hh"
  55 #include "debug/GPUTLB.hh"
  56 #include "mem/packet_access.hh"
  57 #include "mem/page_table.hh"
  58 #include "mem/request.hh"
  59 #include "sim/process.hh"
  60 #include "sim/pseudo_inst.hh"
  61
  62 namespace X86ISA
  63 {
  64
  65     GpuTLB::GpuTLB(const Params *p)
  66         : ClockedObject(p), configAddress(0), size(p->size),
  67           cleanupEvent([this]{ cleanup(); }, name(), false,
  68                        Event::Maximum_Pri),
  69           exitEvent([this]{ exitCallback(); }, name())
  70     {
  71         assoc = p->assoc;
  72         assert(assoc <= size);
  73         numSets = size/assoc;
  74         allocationPolicy = p->allocationPolicy;
  75         hasMemSidePort = false;
  76         accessDistance = p->accessDistance;
  77
  78         tlb.assign(size, TlbEntry());
  79
  80         freeList.resize(numSets);
  81         entryList.resize(numSets);
  82
  83         for (int set = 0; set < numSets; ++set) {
  84             for (int way = 0; way < assoc; ++way) {
  85                 int x = set * assoc + way;
  86                 freeList[set].push_back(&tlb.at(x));
  87             }
  88         }
  89
  90         FA = (size == assoc);
  91
  92         /**
  93          * @warning: the set-associative version assumes you have a
  94          * fixed page size of 4KB.
  95          * If the page size is greather than 4KB (as defined in the
  96          * TheISA::PageBytes), then there are various issues w/ the current
  97          * implementation (you'd have the same 8KB page being replicated in
  98          * different sets etc)
  99          */
 100         setMask = numSets - 1;
 101
 102         maxCoalescedReqs = p->maxOutstandingReqs;
 103
 104         // Do not allow maxCoalescedReqs to be more than the TLB associativity
 105         if (maxCoalescedReqs > assoc) {
 106             maxCoalescedReqs = assoc;
 107             cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
 108         }
 109
 110         outstandingReqs = 0;
 111         hitLatency = p->hitLatency;
 112         missLatency1 = p->missLatency1;
 113         missLatency2 = p->missLatency2;
 114
 115         // create the slave ports based on the number of connected ports
 116         for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
 117             cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
 118                                   name(), i), this, i));
 119         }
 120
 121         // create the master ports based on the number of connected ports
 122         for (size_t i = 0; i < p->port_master_connection_count; ++i) {
 123             memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
 124                                   name(), i), this, i));
 125         }
 126     }
 127
 128     // fixme: this is never called?
 129     GpuTLB::~GpuTLB()
 130     {
 131         // make sure all the hash-maps are empty
 132         assert(translationReturnEvent.empty());
 133     }
 134
 135     Port &
 136     GpuTLB::getPort(const std::string &if_name, PortID idx)
 137     {
 138         if (if_name == "slave") {
 139             if (idx >= static_cast<PortID>(cpuSidePort.size())) {
 140                 panic("TLBCoalescer::getPort: unknown index %d\n", idx);
 141             }
 142
 143             return *cpuSidePort[idx];
 144         } else if (if_name == "master") {
 145             if (idx >= static_cast<PortID>(memSidePort.size())) {
 146                 panic("TLBCoalescer::getPort: unknown index %d\n", idx);
 147             }
 148
 149             hasMemSidePort = true;
 150
 151             return *memSidePort[idx];
 152         } else {
 153             panic("TLBCoalescer::getPort: unknown port %s\n", if_name);
 154         }
 155     }
 156
 157     TlbEntry*
 158     GpuTLB::insert(Addr vpn, TlbEntry &entry)
 159     {
 160         TlbEntry *newEntry = nullptr;
 161
 162         /**
 163          * vpn holds the virtual page address
 164          * The least significant bits are simply masked
 165          */
 166         int set = (vpn >> TheISA::PageShift) & setMask;
 167
 168         if (!freeList[set].empty()) {
 169             newEntry = freeList[set].front();
 170             freeList[set].pop_front();
 171         } else {
 172             newEntry = entryList[set].back();
 173             entryList[set].pop_back();
 174         }
 175
 176         *newEntry = entry;
 177         newEntry->vaddr = vpn;
 178         entryList[set].push_front(newEntry);
 179
 180         return newEntry;
 181     }
 182
 183     GpuTLB::EntryList::iterator
 184     GpuTLB::lookupIt(Addr va, bool update_lru)
 185     {
 186         int set = (va >> TheISA::PageShift) & setMask;
 187
 188         if (FA) {
 189             assert(!set);
 190         }
 191
 192         auto entry = entryList[set].begin();
 193         for (; entry != entryList[set].end(); ++entry) {
 194             int page_size = (*entry)->size();
 195
 196             if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
 197                 DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
 198                         "with size %#x.\n", va, (*entry)->vaddr, page_size);
 199
 200                 if (update_lru) {
 201                     entryList[set].push_front(*entry);
 202                     entryList[set].erase(entry);
 203                     entry = entryList[set].begin();
 204                 }
 205
 206                 break;
 207             }
 208         }
 209
 210         return entry;
 211     }
 212
 213     TlbEntry*
 214     GpuTLB::lookup(Addr va, bool update_lru)
 215     {
 216         int set = (va >> TheISA::PageShift) & setMask;
 217
 218         auto entry = lookupIt(va, update_lru);
 219
 220         if (entry == entryList[set].end())
 221             return nullptr;
 222         else
 223             return *entry;
 224     }
 225
 226     void
 227     GpuTLB::invalidateAll()
 228     {
 229         DPRINTF(GPUTLB, "Invalidating all entries.\n");
 230
 231         for (int i = 0; i < numSets; ++i) {
 232             while (!entryList[i].empty()) {
 233                 TlbEntry *entry = entryList[i].front();
 234                 entryList[i].pop_front();
 235                 freeList[i].push_back(entry);
 236             }
 237         }
 238     }
 239
 240     void
 241     GpuTLB::setConfigAddress(uint32_t addr)
 242     {
 243         configAddress = addr;
 244     }
 245
 246     void
 247     GpuTLB::invalidateNonGlobal()
 248     {
 249         DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
 250
 251         for (int i = 0; i < numSets; ++i) {
 252             for (auto entryIt = entryList[i].begin();
 253                  entryIt != entryList[i].end();) {
 254                 if (!(*entryIt)->global) {
 255                     freeList[i].push_back(*entryIt);
 256                     entryList[i].erase(entryIt++);
 257                 } else {
 258                     ++entryIt;
 259                 }
 260             }
 261         }
 262     }
 263
 264     void
 265     GpuTLB::demapPage(Addr va, uint64_t asn)
 266     {
 267
 268         int set = (va >> TheISA::PageShift) & setMask;
 269         auto entry = lookupIt(va, false);
 270
 271         if (entry != entryList[set].end()) {
 272             freeList[set].push_back(*entry);
 273             entryList[set].erase(entry);
 274         }
 275     }
 276
 277
 278
 279     namespace
 280     {
 281
 282     Cycles
 283     localMiscRegAccess(bool read, MiscRegIndex regNum,
 284                        ThreadContext *tc, PacketPtr pkt)
 285     {
 286         if (read) {
 287             RegVal data = htole(tc->readMiscReg(regNum));
 288             // Make sure we don't trot off the end of data.
 289             pkt->setData((uint8_t *)&data);
 290         } else {
 291             RegVal data = htole(tc->readMiscRegNoEffect(regNum));
 292             tc->setMiscReg(regNum, letoh(data));
 293         }
 294         return Cycles(1);
 295     }
 296
 297     } // anonymous namespace
 298
 299     Fault
 300     GpuTLB::translateInt(bool read, const RequestPtr &req, ThreadContext *tc)
 301     {
 302         DPRINTF(GPUTLB, "Addresses references internal memory.\n");
 303         Addr vaddr = req->getVaddr();
 304         Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
 305
 306         if (prefix == IntAddrPrefixCPUID) {
 307             panic("CPUID memory space not yet implemented!\n");
 308         } else if (prefix == IntAddrPrefixMSR) {
 309             vaddr = (vaddr >> 3) & ~IntAddrPrefixMask;
 310
 311             MiscRegIndex regNum;
 312             if (!msrAddrToIndex(regNum, vaddr))
 313                 return std::make_shared<GeneralProtection>(0);
 314
 315             req->setLocalAccessor(
 316                 [read,regNum](ThreadContext *tc, PacketPtr pkt)
 317                 {
 318                     return localMiscRegAccess(read, regNum, tc, pkt);
 319                 }
 320             );
 321
 322             return NoFault;
 323         } else if (prefix == IntAddrPrefixIO) {
 324             // TODO If CPL > IOPL or in virtual mode, check the I/O permission
 325             // bitmap in the TSS.
 326
 327             Addr IOPort = vaddr & ~IntAddrPrefixMask;
 328             // Make sure the address fits in the expected 16 bit IO address
 329             // space.
 330             assert(!(IOPort & ~0xFFFF));
 331             if (IOPort == 0xCF8 && req->getSize() == 4) {
 332                 req->setLocalAccessor(
 333                     [read](ThreadContext *tc, PacketPtr pkt)
 334                     {
 335                         return localMiscRegAccess(
 336                                 read, MISCREG_PCI_CONFIG_ADDRESS, tc, pkt);
 337                     }
 338                 );
 339             } else if ((IOPort & ~mask(2)) == 0xCFC) {
 340                 req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
 341                 Addr configAddress =
 342                     tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
 343                 if (bits(configAddress, 31, 31)) {
 344                     req->setPaddr(PhysAddrPrefixPciConfig |
 345                             mbits(configAddress, 30, 2) |
 346                             (IOPort & mask(2)));
 347                 } else {
 348                     req->setPaddr(PhysAddrPrefixIO | IOPort);
 349                 }
 350             } else {
 351                 req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
 352                 req->setPaddr(PhysAddrPrefixIO | IOPort);
 353             }
 354             return NoFault;
 355         } else {
 356             panic("Access to unrecognized internal address space %#x.\n",
 357                   prefix);
 358         }
 359     }
 360
 361     /**
 362      * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
 363      * and false on a TLB miss.
 364      * Many of the checks about different modes have been converted to
 365      * assertions, since these parts of the code are not really used.
 366      * On a hit it will update the LRU stack.
 367      */
 368     bool
 369     GpuTLB::tlbLookup(const RequestPtr &req,
 370                       ThreadContext *tc, bool update_stats)
 371     {
 372         bool tlb_hit = false;
 373     #ifndef NDEBUG
 374         uint32_t flags = req->getFlags();
 375         int seg = flags & SegmentFlagMask;
 376     #endif
 377
 378         assert(seg != SEGMENT_REG_MS);
 379         Addr vaddr = req->getVaddr();
 380         DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
 381         HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
 382
 383         if (m5Reg.prot) {
 384             DPRINTF(GPUTLB, "In protected mode.\n");
 385             // make sure we are in 64-bit mode
 386             assert(m5Reg.mode == LongMode);
 387
 388             // If paging is enabled, do the translation.
 389             if (m5Reg.paging) {
 390                 DPRINTF(GPUTLB, "Paging enabled.\n");
 391                 //update LRU stack on a hit
 392                 TlbEntry *entry = lookup(vaddr, true);
 393
 394                 if (entry)
 395                     tlb_hit = true;
 396
 397                 if (!update_stats) {
 398                     // functional tlb access for memory initialization
 399                     // i.e., memory seeding or instr. seeding -> don't update
 400                     // TLB and stats
 401                     return tlb_hit;
 402                 }
 403
 404                 localNumTLBAccesses++;
 405
 406                 if (!entry) {
 407                     localNumTLBMisses++;
 408                 } else {
 409                     localNumTLBHits++;
 410                 }
 411             }
 412         }
 413
 414         return tlb_hit;
 415     }
 416
 417     Fault
 418     GpuTLB::translate(const RequestPtr &req, ThreadContext *tc,
 419                       Translation *translation, Mode mode,
 420                       bool &delayedResponse, bool timing, int &latency)
 421     {
 422         uint32_t flags = req->getFlags();
 423         int seg = flags & SegmentFlagMask;
 424         bool storeCheck = flags & (StoreCheck << FlagShift);
 425
 426         // If this is true, we're dealing with a request
 427         // to a non-memory address space.
 428         if (seg == SEGMENT_REG_MS) {
 429             return translateInt(mode == Mode::Read, req, tc);
 430         }
 431
 432         delayedResponse = false;
 433         Addr vaddr = req->getVaddr();
 434         DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
 435
 436         HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
 437
 438         // If protected mode has been enabled...
 439         if (m5Reg.prot) {
 440             DPRINTF(GPUTLB, "In protected mode.\n");
 441             // If we're not in 64-bit mode, do protection/limit checks
 442             if (m5Reg.mode != LongMode) {
 443                 DPRINTF(GPUTLB, "Not in long mode. Checking segment "
 444                         "protection.\n");
 445
 446                 // Check for a null segment selector.
 447                 if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
 448                     seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
 449                     && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
 450                     return std::make_shared<GeneralProtection>(0);
 451                 }
 452
 453                 bool expandDown = false;
 454                 SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
 455
 456                 if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
 457                     if (!attr.writable && (mode == BaseTLB::Write ||
 458                         storeCheck))
 459                         return std::make_shared<GeneralProtection>(0);
 460
 461                     if (!attr.readable && mode == BaseTLB::Read)
 462                         return std::make_shared<GeneralProtection>(0);
 463
 464                     expandDown = attr.expandDown;
 465
 466                 }
 467
 468                 Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));
 469                 Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
 470                 // This assumes we're not in 64 bit mode. If we were, the
 471                 // default address size is 64 bits, overridable to 32.
 472                 int size = 32;
 473                 bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
 474                 SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
 475
 476                 if ((csAttr.defaultSize && sizeOverride) ||
 477                     (!csAttr.defaultSize && !sizeOverride)) {
 478                     size = 16;
 479                 }
 480
 481                 Addr offset = bits(vaddr - base, size - 1, 0);
 482                 Addr endOffset = offset + req->getSize() - 1;
 483
 484                 if (expandDown) {
 485                     DPRINTF(GPUTLB, "Checking an expand down segment.\n");
 486                     warn_once("Expand down segments are untested.\n");
 487
 488                     if (offset <= limit || endOffset <= limit)
 489                         return std::make_shared<GeneralProtection>(0);
 490                 } else {
 491                     if (offset > limit || endOffset > limit)
 492                         return std::make_shared<GeneralProtection>(0);
 493                 }
 494             }
 495
 496             // If paging is enabled, do the translation.
 497             if (m5Reg.paging) {
 498                 DPRINTF(GPUTLB, "Paging enabled.\n");
 499                 // The vaddr already has the segment base applied.
 500                 TlbEntry *entry = lookup(vaddr);
 501                 localNumTLBAccesses++;
 502
 503                 if (!entry) {
 504                     localNumTLBMisses++;
 505                     if (timing) {
 506                         latency = missLatency1;
 507                     }
 508
 509                     if (FullSystem) {
 510                         fatal("GpuTLB doesn't support full-system mode\n");
 511                     } else {
 512                         DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
 513                                 "at pc %#x.\n", vaddr, tc->instAddr());
 514
 515                         Process *p = tc->getProcessPtr();
 516                         const EmulationPageTable::Entry *pte =
 517                             p->pTable->lookup(vaddr);
 518
 519                         if (!pte && mode != BaseTLB::Execute) {
 520                             // penalize a "page fault" more
 521                             if (timing)
 522                                 latency += missLatency2;
 523
 524                             if (p->fixupFault(vaddr))
 525                                 pte = p->pTable->lookup(vaddr);
 526                         }
 527
 528                         if (!pte) {
 529                             return std::make_shared<PageFault>(vaddr, true,
 530                                                                mode, true,
 531                                                                false);
 532                         } else {
 533                             Addr alignedVaddr = p->pTable->pageAlign(vaddr);
 534
 535                             DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
 536                                     alignedVaddr, pte->paddr);
 537
 538                             TlbEntry gpuEntry(p->pid(), alignedVaddr,
 539                                               pte->paddr, false, false);
 540                             entry = insert(alignedVaddr, gpuEntry);
 541                         }
 542
 543                         DPRINTF(GPUTLB, "Miss was serviced.\n");
 544                     }
 545                 } else {
 546                     localNumTLBHits++;
 547
 548                     if (timing) {
 549                         latency = hitLatency;
 550                     }
 551                 }
 552
 553                 // Do paging protection checks.
 554                 bool inUser = (m5Reg.cpl == 3 &&
 555                                !(flags & (CPL0FlagBit << FlagShift)));
 556
 557                 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
 558                 bool badWrite = (!entry->writable && (inUser || cr0.wp));
 559
 560                 if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
 561                      badWrite)) {
 562                     // The page must have been present to get into the TLB in
 563                     // the first place. We'll assume the reserved bits are
 564                     // fine even though we're not checking them.
 565                     return std::make_shared<PageFault>(vaddr, true, mode,
 566                                                        inUser, false);
 567                 }
 568
 569                 if (storeCheck && badWrite) {
 570                     // This would fault if this were a write, so return a page
 571                     // fault that reflects that happening.
 572                     return std::make_shared<PageFault>(vaddr, true,
 573                                                        BaseTLB::Write,
 574                                                        inUser, false);
 575                 }
 576
 577
 578                 DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
 579                         "checks.\n", entry->paddr);
 580
 581                 int page_size = entry->size();
 582                 Addr paddr = entry->paddr | (vaddr & (page_size - 1));
 583                 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
 584                 req->setPaddr(paddr);
 585
 586                 if (entry->uncacheable)
 587                     req->setFlags(Request::UNCACHEABLE);
 588             } else {
 589                 //Use the address which already has segmentation applied.
 590                 DPRINTF(GPUTLB, "Paging disabled.\n");
 591                 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
 592                 req->setPaddr(vaddr);
 593             }
 594         } else {
 595             // Real mode
 596             DPRINTF(GPUTLB, "In real mode.\n");
 597             DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
 598             req->setPaddr(vaddr);
 599         }
 600
 601         // Check for an access to the local APIC
 602         if (FullSystem) {
 603             LocalApicBase localApicBase =
 604                 tc->readMiscRegNoEffect(MISCREG_APIC_BASE);
 605
 606             Addr baseAddr = localApicBase.base * PageBytes;
 607             Addr paddr = req->getPaddr();
 608
 609             if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
 610                 // Force the access to be uncacheable.
 611                 req->setFlags(Request::UNCACHEABLE);
 612                 req->setPaddr(x86LocalAPICAddress(tc->contextId(),
 613                                                   paddr - baseAddr));
 614             }
 615         }
 616
 617         return NoFault;
 618     };
 619
 620     Fault
 621     GpuTLB::translateAtomic(const RequestPtr &req, ThreadContext *tc,
 622                             Mode mode, int &latency)
 623     {
 624         bool delayedResponse;
 625
 626         return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse,
 627             false, latency);
 628     }
 629
 630     void
 631     GpuTLB::translateTiming(const RequestPtr &req, ThreadContext *tc,
 632             Translation *translation, Mode mode, int &latency)
 633     {
 634         bool delayedResponse;
 635         assert(translation);
 636
 637         Fault fault = GpuTLB::translate(req, tc, translation, mode,
 638                                         delayedResponse, true, latency);
 639
 640         if (!delayedResponse)
 641             translation->finish(fault, req, tc, mode);
 642     }
 643
 644     Walker*
 645     GpuTLB::getWalker()
 646     {
 647         return walker;
 648     }
 649
 650
 651     void
 652     GpuTLB::serialize(CheckpointOut &cp) const
 653     {
 654     }
 655
 656     void
 657     GpuTLB::unserialize(CheckpointIn &cp)
 658     {
 659     }
 660
 661     void
 662     GpuTLB::regStats()
 663     {
 664         ClockedObject::regStats();
 665
 666         localNumTLBAccesses
 667             .name(name() + ".local_TLB_accesses")
 668             .desc("Number of TLB accesses")
 669             ;
 670
 671         localNumTLBHits
 672             .name(name() + ".local_TLB_hits")
 673             .desc("Number of TLB hits")
 674             ;
 675
 676         localNumTLBMisses
 677             .name(name() + ".local_TLB_misses")
 678             .desc("Number of TLB misses")
 679             ;
 680
 681         localTLBMissRate
 682             .name(name() + ".local_TLB_miss_rate")
 683             .desc("TLB miss rate")
 684             ;
 685
 686         accessCycles
 687             .name(name() + ".access_cycles")
 688             .desc("Cycles spent accessing this TLB level")
 689             ;
 690
 691         pageTableCycles
 692             .name(name() + ".page_table_cycles")
 693             .desc("Cycles spent accessing the page table")
 694             ;
 695
 696         localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
 697
 698         numUniquePages
 699             .name(name() + ".unique_pages")
 700             .desc("Number of unique pages touched")
 701             ;
 702
 703         localCycles
 704             .name(name() + ".local_cycles")
 705             .desc("Number of cycles spent in queue for all incoming reqs")
 706             ;
 707
 708         localLatency
 709             .name(name() + ".local_latency")
 710             .desc("Avg. latency over incoming coalesced reqs")
 711             ;
 712
 713         localLatency = localCycles / localNumTLBAccesses;
 714
 715         globalNumTLBAccesses
 716             .name(name() + ".global_TLB_accesses")
 717             .desc("Number of TLB accesses")
 718             ;
 719
 720         globalNumTLBHits
 721             .name(name() + ".global_TLB_hits")
 722             .desc("Number of TLB hits")
 723             ;
 724
 725         globalNumTLBMisses
 726             .name(name() + ".global_TLB_misses")
 727             .desc("Number of TLB misses")
 728             ;
 729
 730         globalTLBMissRate
 731             .name(name() + ".global_TLB_miss_rate")
 732             .desc("TLB miss rate")
 733             ;
 734
 735         globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
 736
 737         avgReuseDistance
 738             .name(name() + ".avg_reuse_distance")
 739             .desc("avg. reuse distance over all pages (in ticks)")
 740             ;
 741
 742     }
 743
 744     /**
 745      * Do the TLB lookup for this coalesced request and schedule
 746      * another event <TLB access latency> cycles later.
 747      */
 748
 749     void
 750     GpuTLB::issueTLBLookup(PacketPtr pkt)
 751     {
 752         assert(pkt);
 753         assert(pkt->senderState);
 754
 755         Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
 756                                         TheISA::PageBytes);
 757
 758         TranslationState *sender_state =
 759                 safe_cast<TranslationState*>(pkt->senderState);
 760
 761         bool update_stats = !sender_state->prefetch;
 762         ThreadContext * tmp_tc = sender_state->tc;
 763
 764         DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
 765                 virt_page_addr);
 766
 767         int req_cnt = sender_state->reqCnt.back();
 768
 769         if (update_stats) {
 770             accessCycles -= (curTick() * req_cnt);
 771             localCycles -= curTick();
 772             updatePageFootprint(virt_page_addr);
 773             globalNumTLBAccesses += req_cnt;
 774         }
 775
 776         tlbOutcome lookup_outcome = TLB_MISS;
 777         const RequestPtr &tmp_req = pkt->req;
 778
 779         // Access the TLB and figure out if it's a hit or a miss.
 780         bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
 781
 782         if (success) {
 783             lookup_outcome = TLB_HIT;
 784             // Put the entry in SenderState
 785             TlbEntry *entry = lookup(tmp_req->getVaddr(), false);
 786             assert(entry);
 787
 788             auto p = sender_state->tc->getProcessPtr();
 789             sender_state->tlbEntry =
 790                 new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
 791                              false, false);
 792
 793             if (update_stats) {
 794                 // the reqCnt has an entry per level, so its size tells us
 795                 // which level we are in
 796                 sender_state->hitLevel = sender_state->reqCnt.size();
 797                 globalNumTLBHits += req_cnt;
 798             }
 799         } else {
 800             if (update_stats)
 801                 globalNumTLBMisses += req_cnt;
 802         }
 803
 804         /*
 805          * We now know the TLB lookup outcome (if it's a hit or a miss), as
 806          * well as the TLB access latency.
 807          *
 808          * We create and schedule a new TLBEvent which will help us take the
 809          * appropriate actions (e.g., update TLB on a hit, send request to
 810          * lower level TLB on a miss, or start a page walk if this was the
 811          * last-level TLB)
 812          */
 813         TLBEvent *tlb_event =
 814             new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
 815
 816         if (translationReturnEvent.count(virt_page_addr)) {
 817             panic("Virtual Page Address %#x already has a return event\n",
 818                   virt_page_addr);
 819         }
 820
 821         translationReturnEvent[virt_page_addr] = tlb_event;
 822         assert(tlb_event);
 823
 824         DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
 825                 curTick() + cyclesToTicks(Cycles(hitLatency)));
 826
 827         schedule(tlb_event, curTick() + cyclesToTicks(Cycles(hitLatency)));
 828     }
 829
 830     GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr,
 831         tlbOutcome tlb_outcome, PacketPtr _pkt)
 832             : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
 833               outcome(tlb_outcome), pkt(_pkt)
 834     {
 835     }
 836
 837     /**
 838      * Do Paging protection checks. If we encounter a page fault, then
 839      * an assertion is fired.
 840      */
 841     void
 842     GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
 843             TlbEntry * tlb_entry, Mode mode)
 844     {
 845         HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
 846         uint32_t flags = pkt->req->getFlags();
 847         bool storeCheck = flags & (StoreCheck << FlagShift);
 848
 849         // Do paging protection checks.
 850         bool inUser
 851             = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
 852         CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
 853
 854         bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
 855
 856         if ((inUser && !tlb_entry->user) ||
 857             (mode == BaseTLB::Write && badWrite)) {
 858             // The page must have been present to get into the TLB in
 859             // the first place. We'll assume the reserved bits are
 860             // fine even though we're not checking them.
 861             panic("Page fault detected");
 862         }
 863
 864         if (storeCheck && badWrite) {
 865             // This would fault if this were a write, so return a page
 866             // fault that reflects that happening.
 867             panic("Page fault detected");
 868         }
 869     }
 870
 871     /**
 872      * handleTranslationReturn is called on a TLB hit,
 873      * when a TLB miss returns or when a page fault returns.
 874      * The latter calls handelHit with TLB miss as tlbOutcome.
 875      */
 876     void
 877     GpuTLB::handleTranslationReturn(Addr virt_page_addr,
 878         tlbOutcome tlb_outcome, PacketPtr pkt)
 879     {
 880         assert(pkt);
 881         Addr vaddr = pkt->req->getVaddr();
 882
 883         TranslationState *sender_state =
 884             safe_cast<TranslationState*>(pkt->senderState);
 885
 886         ThreadContext *tc = sender_state->tc;
 887         Mode mode = sender_state->tlbMode;
 888
 889         TlbEntry *local_entry, *new_entry;
 890
 891         if (tlb_outcome == TLB_HIT) {
 892             DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n",
 893                 vaddr);
 894             local_entry = sender_state->tlbEntry;
 895         } else {
 896             DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
 897                     vaddr);
 898
 899             /**
 900              * We are returning either from a page walk or from a hit at a
 901              * lower TLB level. The senderState should be "carrying" a pointer
 902              * to the correct TLBEntry.
 903              */
 904             new_entry = sender_state->tlbEntry;
 905             assert(new_entry);
 906             local_entry = new_entry;
 907
 908             if (allocationPolicy) {
 909                 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
 910                         virt_page_addr);
 911
 912                 local_entry = insert(virt_page_addr, *new_entry);
 913             }
 914
 915             assert(local_entry);
 916         }
 917
 918         /**
 919          * At this point the packet carries an up-to-date tlbEntry pointer
 920          * in its senderState.
 921          * Next step is to do the paging protection checks.
 922          */
 923         DPRINTF(GPUTLB, "Entry found with vaddr %#x,  doing protection checks "
 924                 "while paddr was %#x.\n", local_entry->vaddr,
 925                 local_entry->paddr);
 926
 927         pagingProtectionChecks(tc, pkt, local_entry, mode);
 928         int page_size = local_entry->size();
 929         Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
 930         DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
 931
 932         // Since this packet will be sent through the cpu side slave port,
 933         // it must be converted to a response pkt if it is not one already
 934         if (pkt->isRequest()) {
 935             pkt->makeTimingResponse();
 936         }
 937
 938         pkt->req->setPaddr(paddr);
 939
 940         if (local_entry->uncacheable) {
 941              pkt->req->setFlags(Request::UNCACHEABLE);
 942         }
 943
 944         //send packet back to coalescer
 945         cpuSidePort[0]->sendTimingResp(pkt);
 946         //schedule cleanup event
 947         cleanupQueue.push(virt_page_addr);
 948
 949         // schedule this only once per cycle.
 950         // The check is required because we might have multiple translations
 951         // returning the same cycle
 952         // this is a maximum priority event and must be on the same cycle
 953         // as the cleanup event in TLBCoalescer to avoid a race with
 954         // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
 955         if (!cleanupEvent.scheduled())
 956             schedule(cleanupEvent, curTick());
 957     }
 958
 959     /**
 960      * Here we take the appropriate actions based on the result of the
 961      * TLB lookup.
 962      */
 963     void
 964     GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
 965                               PacketPtr pkt)
 966     {
 967         DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
 968
 969         assert(translationReturnEvent[virtPageAddr]);
 970         assert(pkt);
 971
 972         TranslationState *tmp_sender_state =
 973             safe_cast<TranslationState*>(pkt->senderState);
 974
 975         int req_cnt = tmp_sender_state->reqCnt.back();
 976         bool update_stats = !tmp_sender_state->prefetch;
 977
 978
 979         if (outcome == TLB_HIT) {
 980             handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
 981
 982             if (update_stats) {
 983                 accessCycles += (req_cnt * curTick());
 984                 localCycles += curTick();
 985             }
 986
 987         } else if (outcome == TLB_MISS) {
 988
 989             DPRINTF(GPUTLB, "This is a TLB miss\n");
 990             if (update_stats) {
 991                 accessCycles += (req_cnt*curTick());
 992                 localCycles += curTick();
 993             }
 994
 995             if (hasMemSidePort) {
 996                 // the one cyle added here represent the delay from when we get
 997                 // the reply back till when we propagate it to the coalescer
 998                 // above.
 999                 if (update_stats) {
1000                     accessCycles += (req_cnt * 1);
1001                     localCycles += 1;
1002                 }
1003
1004                 /**
1005                  * There is a TLB below. Send the coalesced request.
1006                  * We actually send the very first packet of all the
1007                  * pending packets for this virtual page address.
1008                  */
1009                 if (!memSidePort[0]->sendTimingReq(pkt)) {
1010                     DPRINTF(GPUTLB, "Failed sending translation request to "
1011                             "lower level TLB for addr %#x\n", virtPageAddr);
1012
1013                     memSidePort[0]->retries.push_back(pkt);
1014                 } else {
1015                     DPRINTF(GPUTLB, "Sent translation request to lower level "
1016                             "TLB for addr %#x\n", virtPageAddr);
1017                 }
1018             } else {
1019                 //this is the last level TLB. Start a page walk
1020                 DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1021                         "addr %#x\n", virtPageAddr);
1022
1023                 if (update_stats)
1024                     pageTableCycles -= (req_cnt*curTick());
1025
1026                 TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
1027                 assert(tlb_event);
1028                 tlb_event->updateOutcome(PAGE_WALK);
1029                 schedule(tlb_event,
1030                          curTick() + cyclesToTicks(Cycles(missLatency2)));
1031             }
1032         } else if (outcome == PAGE_WALK) {
1033             if (update_stats)
1034                 pageTableCycles += (req_cnt*curTick());
1035
1036             // Need to access the page table and update the TLB
1037             DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1038                     virtPageAddr);
1039
1040             TranslationState *sender_state =
1041                 safe_cast<TranslationState*>(pkt->senderState);
1042
1043             Process *p = sender_state->tc->getProcessPtr();
1044             Addr vaddr = pkt->req->getVaddr();
1045     #ifndef NDEBUG
1046             Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1047             assert(alignedVaddr == virtPageAddr);
1048     #endif
1049             const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr);
1050             if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1051                     p->fixupFault(vaddr)) {
1052                 pte = p->pTable->lookup(vaddr);
1053             }
1054
1055             if (pte) {
1056                 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1057                         pte->paddr);
1058
1059                 sender_state->tlbEntry =
1060                     new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false,
1061                                  false);
1062             } else {
1063                 sender_state->tlbEntry = nullptr;
1064             }
1065
1066             handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1067         } else if (outcome == MISS_RETURN) {
1068             /** we add an extra cycle in the return path of the translation
1069              * requests in between the various TLB levels.
1070              */
1071             handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1072         } else {
1073             panic("Unexpected TLB outcome %d", outcome);
1074         }
1075     }
1076
1077     void
1078     GpuTLB::TLBEvent::process()
1079     {
1080         tlb->translationReturn(virtPageAddr, outcome, pkt);
1081     }
1082
1083     const char*
1084     GpuTLB::TLBEvent::description() const
1085     {
1086         return "trigger translationDoneEvent";
1087     }
1088
1089     void
1090     GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
1091     {
1092         outcome = _outcome;
1093     }
1094
1095     Addr
1096     GpuTLB::TLBEvent::getTLBEventVaddr()
1097     {
1098         return virtPageAddr;
1099     }
1100
1101     /**
1102      * recvTiming receives a coalesced timing request from a TLBCoalescer
1103      * and it calls issueTLBLookup()
1104      * It only rejects the packet if we have exceeded the max
1105      * outstanding number of requests for the TLB
1106      */
1107     bool
1108     GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
1109     {
1110         if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
1111             tlb->issueTLBLookup(pkt);
1112             // update number of outstanding translation requests
1113             tlb->outstandingReqs++;
1114             return true;
1115          } else {
1116             DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1117                     tlb->outstandingReqs);
1118             return false;
1119          }
1120     }
1121
1122     /**
1123      * handleFuncTranslationReturn is called on a TLB hit,
1124      * when a TLB miss returns or when a page fault returns.
1125      * It updates LRU, inserts the TLB entry on a miss
1126      * depending on the allocation policy and does the required
1127      * protection checks. It does NOT create a new packet to
1128      * update the packet's addr; this is done in hsail-gpu code.
1129      */
1130     void
1131     GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
1132     {
1133         TranslationState *sender_state =
1134             safe_cast<TranslationState*>(pkt->senderState);
1135
1136         ThreadContext *tc = sender_state->tc;
1137         Mode mode = sender_state->tlbMode;
1138         Addr vaddr = pkt->req->getVaddr();
1139
1140         TlbEntry *local_entry, *new_entry;
1141
1142         if (tlb_outcome == TLB_HIT) {
1143             DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1144                     "%#x\n", vaddr);
1145
1146             local_entry = sender_state->tlbEntry;
1147         } else {
1148             DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1149                     "%#x\n", vaddr);
1150
1151             /**
1152              * We are returning either from a page walk or from a hit at a
1153              * lower TLB level. The senderState should be "carrying" a pointer
1154              * to the correct TLBEntry.
1155              */
1156             new_entry = sender_state->tlbEntry;
1157             assert(new_entry);
1158             local_entry = new_entry;
1159
1160             if (allocationPolicy) {
1161                 Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1162
1163                 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1164                         virt_page_addr);
1165
1166                 local_entry = insert(virt_page_addr, *new_entry);
1167             }
1168
1169             assert(local_entry);
1170         }
1171
1172         DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1173                 "while paddr was %#x.\n", local_entry->vaddr,
1174                 local_entry->paddr);
1175
1176         /**
1177          * Do paging checks if it's a normal functional access.  If it's for a
1178          * prefetch, then sometimes you can try to prefetch something that
1179          * won't pass protection. We don't actually want to fault becuase there
1180          * is no demand access to deem this a violation.  Just put it in the
1181          * TLB and it will fault if indeed a future demand access touches it in
1182          * violation.
1183          *
1184          * This feature could be used to explore security issues around
1185          * speculative memory accesses.
1186          */
1187         if (!sender_state->prefetch && sender_state->tlbEntry)
1188             pagingProtectionChecks(tc, pkt, local_entry, mode);
1189
1190         int page_size = local_entry->size();
1191         Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1192         DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1193
1194         pkt->req->setPaddr(paddr);
1195
1196         if (local_entry->uncacheable)
1197              pkt->req->setFlags(Request::UNCACHEABLE);
1198     }
1199
1200     // This is used for atomic translations. Need to
1201     // make it all happen during the same cycle.
1202     void
1203     GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
1204     {
1205         TranslationState *sender_state =
1206             safe_cast<TranslationState*>(pkt->senderState);
1207
1208         ThreadContext *tc = sender_state->tc;
1209         bool update_stats = !sender_state->prefetch;
1210
1211         Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1212                                         TheISA::PageBytes);
1213
1214         if (update_stats)
1215             tlb->updatePageFootprint(virt_page_addr);
1216
1217         // do the TLB lookup without updating the stats
1218         bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1219         tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1220
1221         // functional mode means no coalescing
1222         // global metrics are the same as the local metrics
1223         if (update_stats) {
1224             tlb->globalNumTLBAccesses++;
1225
1226             if (success) {
1227                 sender_state->hitLevel = sender_state->reqCnt.size();
1228                 tlb->globalNumTLBHits++;
1229             }
1230         }
1231
1232         if (!success) {
1233             if (update_stats)
1234                 tlb->globalNumTLBMisses++;
1235             if (tlb->hasMemSidePort) {
1236                 // there is a TLB below -> propagate down the TLB hierarchy
1237                 tlb->memSidePort[0]->sendFunctional(pkt);
1238                 // If no valid translation from a prefetch, then just return
1239                 if (sender_state->prefetch && !pkt->req->hasPaddr())
1240                     return;
1241             } else {
1242                 // Need to access the page table and update the TLB
1243                 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1244                         virt_page_addr);
1245
1246                 Process *p = tc->getProcessPtr();
1247
1248                 Addr vaddr = pkt->req->getVaddr();
1249     #ifndef NDEBUG
1250                 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1251                 assert(alignedVaddr == virt_page_addr);
1252     #endif
1253
1254                 const EmulationPageTable::Entry *pte =
1255                         p->pTable->lookup(vaddr);
1256                 if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1257                         p->fixupFault(vaddr)) {
1258                     pte = p->pTable->lookup(vaddr);
1259                 }
1260
1261                 if (!sender_state->prefetch) {
1262                     // no PageFaults are permitted after
1263                     // the second page table lookup
1264                     assert(pte);
1265
1266                     DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1267                             pte->paddr);
1268
1269                     sender_state->tlbEntry =
1270                         new TlbEntry(p->pid(), virt_page_addr,
1271                                      pte->paddr, false, false);
1272                 } else {
1273                     // If this was a prefetch, then do the normal thing if it
1274                     // was a successful translation.  Otherwise, send an empty
1275                     // TLB entry back so that it can be figured out as empty
1276                     // and handled accordingly.
1277                     if (pte) {
1278                         DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1279                                 pte->paddr);
1280
1281                         sender_state->tlbEntry =
1282                             new TlbEntry(p->pid(), virt_page_addr,
1283                                          pte->paddr, false, false);
1284                     } else {
1285                         DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1286                                 alignedVaddr);
1287
1288                         sender_state->tlbEntry = nullptr;
1289
1290                         return;
1291                     }
1292                 }
1293             }
1294         } else {
1295             DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1296                     tlb->lookup(pkt->req->getVaddr()));
1297
1298             TlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1299                                              update_stats);
1300
1301             assert(entry);
1302
1303             auto p = sender_state->tc->getProcessPtr();
1304             sender_state->tlbEntry =
1305                 new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1306                              false, false);
1307         }
1308         // This is the function that would populate pkt->req with the paddr of
1309         // the translation. But if no translation happens (i.e Prefetch fails)
1310         // then the early returns in the above code wiill keep this function
1311         // from executing.
1312         tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1313     }
1314
1315     void
1316     GpuTLB::CpuSidePort::recvReqRetry()
1317     {
1318         // The CPUSidePort never sends anything but replies. No retries
1319         // expected.
1320         panic("recvReqRetry called");
1321     }
1322
1323     AddrRangeList
1324     GpuTLB::CpuSidePort::getAddrRanges() const
1325     {
1326         // currently not checked by the master
1327         AddrRangeList ranges;
1328
1329         return ranges;
1330     }
1331
1332     /**
1333      * MemSidePort receives the packet back.
1334      * We need to call the handleTranslationReturn
1335      * and propagate up the hierarchy.
1336      */
1337     bool
1338     GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
1339     {
1340         Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1341                                         TheISA::PageBytes);
1342
1343         DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1344                 virt_page_addr);
1345
1346         TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1347         assert(tlb_event);
1348         assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1349
1350         tlb_event->updateOutcome(MISS_RETURN);
1351         tlb->schedule(tlb_event, curTick()+tlb->clockPeriod());
1352
1353         return true;
1354     }
1355
1356     void
1357     GpuTLB::MemSidePort::recvReqRetry()
1358     {
1359         // No retries should reach the TLB. The retries
1360         // should only reach the TLBCoalescer.
1361         panic("recvReqRetry called");
1362     }
1363
1364     void
1365     GpuTLB::cleanup()
1366     {
1367         while (!cleanupQueue.empty()) {
1368             Addr cleanup_addr = cleanupQueue.front();
1369             cleanupQueue.pop();
1370
1371             // delete TLBEvent
1372             TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1373             delete old_tlb_event;
1374             translationReturnEvent.erase(cleanup_addr);
1375
1376             // update number of outstanding requests
1377             outstandingReqs--;
1378         }
1379
1380         /** the higher level coalescer should retry if it has
1381          * any pending requests.
1382          */
1383         for (int i = 0; i < cpuSidePort.size(); ++i) {
1384             cpuSidePort[i]->sendRetryReq();
1385         }
1386     }
1387
1388     void
1389     GpuTLB::updatePageFootprint(Addr virt_page_addr)
1390     {
1391
1392         std::pair<AccessPatternTable::iterator, bool> ret;
1393
1394         AccessInfo tmp_access_info;
1395         tmp_access_info.lastTimeAccessed = 0;
1396         tmp_access_info.accessesPerPage = 0;
1397         tmp_access_info.totalReuseDistance = 0;
1398         tmp_access_info.sumDistance = 0;
1399         tmp_access_info.meanDistance = 0;
1400
1401         ret = TLBFootprint.insert(
1402             AccessPatternTable::value_type(virt_page_addr, tmp_access_info));
1403
1404         bool first_page_access = ret.second;
1405
1406         if (first_page_access) {
1407             numUniquePages++;
1408         } else  {
1409             int accessed_before;
1410             accessed_before  = curTick() - ret.first->second.lastTimeAccessed;
1411             ret.first->second.totalReuseDistance += accessed_before;
1412         }
1413
1414         ret.first->second.accessesPerPage++;
1415         ret.first->second.lastTimeAccessed = curTick();
1416
1417         if (accessDistance) {
1418             ret.first->second.localTLBAccesses
1419                 .push_back(localNumTLBAccesses.value());
1420         }
1421     }
1422
1423     void
1424     GpuTLB::exitCallback()
1425     {
1426         std::ostream *page_stat_file = nullptr;
1427
1428         if (accessDistance) {
1429
1430             // print per page statistics to a separate file (.csv format)
1431             // simout is the gem5 output directory (default is m5out or the one
1432             // specified with -d
1433             page_stat_file = simout.create(name().c_str())->stream();
1434
1435             // print header
1436             *page_stat_file
1437                 << "page,max_access_distance,mean_access_distance, "
1438                 << "stddev_distance" << std::endl;
1439         }
1440
1441         // update avg. reuse distance footprint
1442         unsigned int sum_avg_reuse_distance_per_page = 0;
1443
1444         // iterate through all pages seen by this TLB
1445         for (auto &iter : TLBFootprint) {
1446             sum_avg_reuse_distance_per_page += iter.second.totalReuseDistance /
1447                                                iter.second.accessesPerPage;
1448
1449             if (accessDistance) {
1450                 unsigned int tmp = iter.second.localTLBAccesses[0];
1451                 unsigned int prev = tmp;
1452
1453                 for (int i = 0; i < iter.second.localTLBAccesses.size(); ++i) {
1454                     if (i) {
1455                         tmp = prev + 1;
1456                     }
1457
1458                     prev = iter.second.localTLBAccesses[i];
1459                     // update the localTLBAccesses value
1460                     // with the actual differece
1461                     iter.second.localTLBAccesses[i] -= tmp;
1462                     // compute the sum of AccessDistance per page
1463                     // used later for mean
1464                     iter.second.sumDistance +=
1465                         iter.second.localTLBAccesses[i];
1466                 }
1467
1468                 iter.second.meanDistance =
1469                     iter.second.sumDistance / iter.second.accessesPerPage;
1470
1471                 // compute std_dev and max  (we need a second round because we
1472                 // need to know the mean value
1473                 unsigned int max_distance = 0;
1474                 unsigned int stddev_distance = 0;
1475
1476                 for (int i = 0; i < iter.second.localTLBAccesses.size(); ++i) {
1477                     unsigned int tmp_access_distance =
1478                         iter.second.localTLBAccesses[i];
1479
1480                     if (tmp_access_distance > max_distance) {
1481                         max_distance = tmp_access_distance;
1482                     }
1483
1484                     unsigned int diff =
1485                         tmp_access_distance - iter.second.meanDistance;
1486                     stddev_distance += pow(diff, 2);
1487
1488                 }
1489
1490                 stddev_distance =
1491                     sqrt(stddev_distance/iter.second.accessesPerPage);
1492
1493                 if (page_stat_file) {
1494                     *page_stat_file << std::hex << iter.first << ",";
1495                     *page_stat_file << std::dec << max_distance << ",";
1496                     *page_stat_file << std::dec << iter.second.meanDistance
1497                                     << ",";
1498                     *page_stat_file << std::dec << stddev_distance;
1499                     *page_stat_file << std::endl;
1500                 }
1501
1502                 // erase the localTLBAccesses array
1503                 iter.second.localTLBAccesses.clear();
1504             }
1505         }
1506
1507         if (!TLBFootprint.empty()) {
1508             avgReuseDistance =
1509                 sum_avg_reuse_distance_per_page / TLBFootprint.size();
1510         }
1511
1512         //clear the TLBFootprint map
1513         TLBFootprint.clear();
1514     }
1515 } // namespace X86ISA
1516
1517 X86ISA::GpuTLB*
1518 X86GPUTLBParams::create()
1519 {
1520     return new X86ISA::GpuTLB(this);
1521 }
1522