src/gpu-compute/gpu_tlb.cc

   1 /*
   2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
   3  * All rights reserved.
   4  *
   5  * For use for simulation and test purposes only
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions are met:
   9  *
  10  * 1. Redistributions of source code must retain the above copyright notice,
  11  * this list of conditions and the following disclaimer.
  12  *
  13  * 2. Redistributions in binary form must reproduce the above copyright notice,
  14  * this list of conditions and the following disclaimer in the documentation
  15  * and/or other materials provided with the distribution.
  16  *
  17  * 3. Neither the name of the copyright holder nor the names of its contributors
  18  * may be used to endorse or promote products derived from this software
  19  * without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31  * POSSIBILITY OF SUCH DAMAGE.
  32  *
  33  * Author: Lisa Hsu
  34  */
  35
  36 #include "gpu-compute/gpu_tlb.hh"
  37
  38 #include <cmath>
  39 #include <cstring>
  40
  41 #include "arch/x86/faults.hh"
  42 #include "arch/x86/insts/microldstop.hh"
  43 #include "arch/x86/pagetable.hh"
  44 #include "arch/x86/pagetable_walker.hh"
  45 #include "arch/x86/regs/misc.hh"
  46 #include "arch/x86/regs/msr.hh"
  47 #include "arch/x86/x86_traits.hh"
  48 #include "base/bitfield.hh"
  49 #include "base/logging.hh"
  50 #include "base/output.hh"
  51 #include "base/trace.hh"
  52 #include "cpu/base.hh"
  53 #include "cpu/thread_context.hh"
  54 #include "debug/GPUPrefetch.hh"
  55 #include "debug/GPUTLB.hh"
  56 #include "mem/packet_access.hh"
  57 #include "mem/page_table.hh"
  58 #include "mem/request.hh"
  59 #include "sim/process.hh"
  60 #include "sim/pseudo_inst.hh"
  61
  62 namespace X86ISA
  63 {
  64
  65     GpuTLB::GpuTLB(const Params *p)
  66         : ClockedObject(p), configAddress(0), size(p->size),
  67           cleanupEvent([this]{ cleanup(); }, name(), false,
  68                        Event::Maximum_Pri),
  69           exitEvent([this]{ exitCallback(); }, name())
  70     {
  71         assoc = p->assoc;
  72         assert(assoc <= size);
  73         numSets = size/assoc;
  74         allocationPolicy = p->allocationPolicy;
  75         hasMemSidePort = false;
  76         accessDistance = p->accessDistance;
  77         clock = p->clk_domain->clockPeriod();
  78
  79         tlb.assign(size, TlbEntry());
  80
  81         freeList.resize(numSets);
  82         entryList.resize(numSets);
  83
  84         for (int set = 0; set < numSets; ++set) {
  85             for (int way = 0; way < assoc; ++way) {
  86                 int x = set * assoc + way;
  87                 freeList[set].push_back(&tlb.at(x));
  88             }
  89         }
  90
  91         FA = (size == assoc);
  92
  93         /**
  94          * @warning: the set-associative version assumes you have a
  95          * fixed page size of 4KB.
  96          * If the page size is greather than 4KB (as defined in the
  97          * TheISA::PageBytes), then there are various issues w/ the current
  98          * implementation (you'd have the same 8KB page being replicated in
  99          * different sets etc)
 100          */
 101         setMask = numSets - 1;
 102
 103         maxCoalescedReqs = p->maxOutstandingReqs;
 104
 105         // Do not allow maxCoalescedReqs to be more than the TLB associativity
 106         if (maxCoalescedReqs > assoc) {
 107             maxCoalescedReqs = assoc;
 108             cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
 109         }
 110
 111         outstandingReqs = 0;
 112         hitLatency = p->hitLatency;
 113         missLatency1 = p->missLatency1;
 114         missLatency2 = p->missLatency2;
 115
 116         // create the slave ports based on the number of connected ports
 117         for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
 118             cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
 119                                   name(), i), this, i));
 120         }
 121
 122         // create the master ports based on the number of connected ports
 123         for (size_t i = 0; i < p->port_master_connection_count; ++i) {
 124             memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
 125                                   name(), i), this, i));
 126         }
 127     }
 128
 129     // fixme: this is never called?
 130     GpuTLB::~GpuTLB()
 131     {
 132         // make sure all the hash-maps are empty
 133         assert(translationReturnEvent.empty());
 134     }
 135
 136     Port &
 137     GpuTLB::getPort(const std::string &if_name, PortID idx)
 138     {
 139         if (if_name == "slave") {
 140             if (idx >= static_cast<PortID>(cpuSidePort.size())) {
 141                 panic("TLBCoalescer::getPort: unknown index %d\n", idx);
 142             }
 143
 144             return *cpuSidePort[idx];
 145         } else if (if_name == "master") {
 146             if (idx >= static_cast<PortID>(memSidePort.size())) {
 147                 panic("TLBCoalescer::getPort: unknown index %d\n", idx);
 148             }
 149
 150             hasMemSidePort = true;
 151
 152             return *memSidePort[idx];
 153         } else {
 154             panic("TLBCoalescer::getPort: unknown port %s\n", if_name);
 155         }
 156     }
 157
 158     TlbEntry*
 159     GpuTLB::insert(Addr vpn, TlbEntry &entry)
 160     {
 161         TlbEntry *newEntry = nullptr;
 162
 163         /**
 164          * vpn holds the virtual page address
 165          * The least significant bits are simply masked
 166          */
 167         int set = (vpn >> TheISA::PageShift) & setMask;
 168
 169         if (!freeList[set].empty()) {
 170             newEntry = freeList[set].front();
 171             freeList[set].pop_front();
 172         } else {
 173             newEntry = entryList[set].back();
 174             entryList[set].pop_back();
 175         }
 176
 177         *newEntry = entry;
 178         newEntry->vaddr = vpn;
 179         entryList[set].push_front(newEntry);
 180
 181         return newEntry;
 182     }
 183
 184     GpuTLB::EntryList::iterator
 185     GpuTLB::lookupIt(Addr va, bool update_lru)
 186     {
 187         int set = (va >> TheISA::PageShift) & setMask;
 188
 189         if (FA) {
 190             assert(!set);
 191         }
 192
 193         auto entry = entryList[set].begin();
 194         for (; entry != entryList[set].end(); ++entry) {
 195             int page_size = (*entry)->size();
 196
 197             if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
 198                 DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
 199                         "with size %#x.\n", va, (*entry)->vaddr, page_size);
 200
 201                 if (update_lru) {
 202                     entryList[set].push_front(*entry);
 203                     entryList[set].erase(entry);
 204                     entry = entryList[set].begin();
 205                 }
 206
 207                 break;
 208             }
 209         }
 210
 211         return entry;
 212     }
 213
 214     TlbEntry*
 215     GpuTLB::lookup(Addr va, bool update_lru)
 216     {
 217         int set = (va >> TheISA::PageShift) & setMask;
 218
 219         auto entry = lookupIt(va, update_lru);
 220
 221         if (entry == entryList[set].end())
 222             return nullptr;
 223         else
 224             return *entry;
 225     }
 226
 227     void
 228     GpuTLB::invalidateAll()
 229     {
 230         DPRINTF(GPUTLB, "Invalidating all entries.\n");
 231
 232         for (int i = 0; i < numSets; ++i) {
 233             while (!entryList[i].empty()) {
 234                 TlbEntry *entry = entryList[i].front();
 235                 entryList[i].pop_front();
 236                 freeList[i].push_back(entry);
 237             }
 238         }
 239     }
 240
 241     void
 242     GpuTLB::setConfigAddress(uint32_t addr)
 243     {
 244         configAddress = addr;
 245     }
 246
 247     void
 248     GpuTLB::invalidateNonGlobal()
 249     {
 250         DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
 251
 252         for (int i = 0; i < numSets; ++i) {
 253             for (auto entryIt = entryList[i].begin();
 254                  entryIt != entryList[i].end();) {
 255                 if (!(*entryIt)->global) {
 256                     freeList[i].push_back(*entryIt);
 257                     entryList[i].erase(entryIt++);
 258                 } else {
 259                     ++entryIt;
 260                 }
 261             }
 262         }
 263     }
 264
 265     void
 266     GpuTLB::demapPage(Addr va, uint64_t asn)
 267     {
 268
 269         int set = (va >> TheISA::PageShift) & setMask;
 270         auto entry = lookupIt(va, false);
 271
 272         if (entry != entryList[set].end()) {
 273             freeList[set].push_back(*entry);
 274             entryList[set].erase(entry);
 275         }
 276     }
 277
 278
 279
 280     namespace
 281     {
 282
 283     Cycles
 284     localMiscRegAccess(bool read, MiscRegIndex regNum,
 285                        ThreadContext *tc, PacketPtr pkt)
 286     {
 287         if (read) {
 288             RegVal data = htole(tc->readMiscReg(regNum));
 289             // Make sure we don't trot off the end of data.
 290             pkt->setData((uint8_t *)&data);
 291         } else {
 292             RegVal data = htole(tc->readMiscRegNoEffect(regNum));
 293             tc->setMiscReg(regNum, letoh(data));
 294         }
 295         return Cycles(1);
 296     }
 297
 298     } // anonymous namespace
 299
 300     Fault
 301     GpuTLB::translateInt(bool read, const RequestPtr &req, ThreadContext *tc)
 302     {
 303         DPRINTF(GPUTLB, "Addresses references internal memory.\n");
 304         Addr vaddr = req->getVaddr();
 305         Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
 306
 307         if (prefix == IntAddrPrefixCPUID) {
 308             panic("CPUID memory space not yet implemented!\n");
 309         } else if (prefix == IntAddrPrefixMSR) {
 310             vaddr = (vaddr >> 3) & ~IntAddrPrefixMask;
 311
 312             MiscRegIndex regNum;
 313             if (!msrAddrToIndex(regNum, vaddr))
 314                 return std::make_shared<GeneralProtection>(0);
 315
 316             req->setLocalAccessor(
 317                 [read,regNum,vaddr](ThreadContext *tc, PacketPtr pkt)
 318                 {
 319                     return localMiscRegAccess(read, regNum, tc, pkt);
 320                 }
 321             );
 322
 323             return NoFault;
 324         } else if (prefix == IntAddrPrefixIO) {
 325             // TODO If CPL > IOPL or in virtual mode, check the I/O permission
 326             // bitmap in the TSS.
 327
 328             Addr IOPort = vaddr & ~IntAddrPrefixMask;
 329             // Make sure the address fits in the expected 16 bit IO address
 330             // space.
 331             assert(!(IOPort & ~0xFFFF));
 332             if (IOPort == 0xCF8 && req->getSize() == 4) {
 333                 req->setLocalAccessor(
 334                     [read](ThreadContext *tc, PacketPtr pkt)
 335                     {
 336                         return localMiscRegAccess(
 337                                 read, MISCREG_PCI_CONFIG_ADDRESS, tc, pkt);
 338                     }
 339                 );
 340             } else if ((IOPort & ~mask(2)) == 0xCFC) {
 341                 req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
 342                 Addr configAddress =
 343                     tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
 344                 if (bits(configAddress, 31, 31)) {
 345                     req->setPaddr(PhysAddrPrefixPciConfig |
 346                             mbits(configAddress, 30, 2) |
 347                             (IOPort & mask(2)));
 348                 } else {
 349                     req->setPaddr(PhysAddrPrefixIO | IOPort);
 350                 }
 351             } else {
 352                 req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER);
 353                 req->setPaddr(PhysAddrPrefixIO | IOPort);
 354             }
 355             return NoFault;
 356         } else {
 357             panic("Access to unrecognized internal address space %#x.\n",
 358                   prefix);
 359         }
 360     }
 361
 362     /**
 363      * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
 364      * and false on a TLB miss.
 365      * Many of the checks about different modes have been converted to
 366      * assertions, since these parts of the code are not really used.
 367      * On a hit it will update the LRU stack.
 368      */
 369     bool
 370     GpuTLB::tlbLookup(const RequestPtr &req,
 371                       ThreadContext *tc, bool update_stats)
 372     {
 373         bool tlb_hit = false;
 374     #ifndef NDEBUG
 375         uint32_t flags = req->getFlags();
 376         int seg = flags & SegmentFlagMask;
 377     #endif
 378
 379         assert(seg != SEGMENT_REG_MS);
 380         Addr vaddr = req->getVaddr();
 381         DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
 382         HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
 383
 384         if (m5Reg.prot) {
 385             DPRINTF(GPUTLB, "In protected mode.\n");
 386             // make sure we are in 64-bit mode
 387             assert(m5Reg.mode == LongMode);
 388
 389             // If paging is enabled, do the translation.
 390             if (m5Reg.paging) {
 391                 DPRINTF(GPUTLB, "Paging enabled.\n");
 392                 //update LRU stack on a hit
 393                 TlbEntry *entry = lookup(vaddr, true);
 394
 395                 if (entry)
 396                     tlb_hit = true;
 397
 398                 if (!update_stats) {
 399                     // functional tlb access for memory initialization
 400                     // i.e., memory seeding or instr. seeding -> don't update
 401                     // TLB and stats
 402                     return tlb_hit;
 403                 }
 404
 405                 localNumTLBAccesses++;
 406
 407                 if (!entry) {
 408                     localNumTLBMisses++;
 409                 } else {
 410                     localNumTLBHits++;
 411                 }
 412             }
 413         }
 414
 415         return tlb_hit;
 416     }
 417
 418     Fault
 419     GpuTLB::translate(const RequestPtr &req, ThreadContext *tc,
 420                       Translation *translation, Mode mode,
 421                       bool &delayedResponse, bool timing, int &latency)
 422     {
 423         uint32_t flags = req->getFlags();
 424         int seg = flags & SegmentFlagMask;
 425         bool storeCheck = flags & (StoreCheck << FlagShift);
 426
 427         // If this is true, we're dealing with a request
 428         // to a non-memory address space.
 429         if (seg == SEGMENT_REG_MS) {
 430             return translateInt(mode == Mode::Read, req, tc);
 431         }
 432
 433         delayedResponse = false;
 434         Addr vaddr = req->getVaddr();
 435         DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
 436
 437         HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
 438
 439         // If protected mode has been enabled...
 440         if (m5Reg.prot) {
 441             DPRINTF(GPUTLB, "In protected mode.\n");
 442             // If we're not in 64-bit mode, do protection/limit checks
 443             if (m5Reg.mode != LongMode) {
 444                 DPRINTF(GPUTLB, "Not in long mode. Checking segment "
 445                         "protection.\n");
 446
 447                 // Check for a null segment selector.
 448                 if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
 449                     seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
 450                     && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
 451                     return std::make_shared<GeneralProtection>(0);
 452                 }
 453
 454                 bool expandDown = false;
 455                 SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
 456
 457                 if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
 458                     if (!attr.writable && (mode == BaseTLB::Write ||
 459                         storeCheck))
 460                         return std::make_shared<GeneralProtection>(0);
 461
 462                     if (!attr.readable && mode == BaseTLB::Read)
 463                         return std::make_shared<GeneralProtection>(0);
 464
 465                     expandDown = attr.expandDown;
 466
 467                 }
 468
 469                 Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));
 470                 Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
 471                 // This assumes we're not in 64 bit mode. If we were, the
 472                 // default address size is 64 bits, overridable to 32.
 473                 int size = 32;
 474                 bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
 475                 SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
 476
 477                 if ((csAttr.defaultSize && sizeOverride) ||
 478                     (!csAttr.defaultSize && !sizeOverride)) {
 479                     size = 16;
 480                 }
 481
 482                 Addr offset = bits(vaddr - base, size - 1, 0);
 483                 Addr endOffset = offset + req->getSize() - 1;
 484
 485                 if (expandDown) {
 486                     DPRINTF(GPUTLB, "Checking an expand down segment.\n");
 487                     warn_once("Expand down segments are untested.\n");
 488
 489                     if (offset <= limit || endOffset <= limit)
 490                         return std::make_shared<GeneralProtection>(0);
 491                 } else {
 492                     if (offset > limit || endOffset > limit)
 493                         return std::make_shared<GeneralProtection>(0);
 494                 }
 495             }
 496
 497             // If paging is enabled, do the translation.
 498             if (m5Reg.paging) {
 499                 DPRINTF(GPUTLB, "Paging enabled.\n");
 500                 // The vaddr already has the segment base applied.
 501                 TlbEntry *entry = lookup(vaddr);
 502                 localNumTLBAccesses++;
 503
 504                 if (!entry) {
 505                     localNumTLBMisses++;
 506                     if (timing) {
 507                         latency = missLatency1;
 508                     }
 509
 510                     if (FullSystem) {
 511                         fatal("GpuTLB doesn't support full-system mode\n");
 512                     } else {
 513                         DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
 514                                 "at pc %#x.\n", vaddr, tc->instAddr());
 515
 516                         Process *p = tc->getProcessPtr();
 517                         const EmulationPageTable::Entry *pte =
 518                             p->pTable->lookup(vaddr);
 519
 520                         if (!pte && mode != BaseTLB::Execute) {
 521                             // penalize a "page fault" more
 522                             if (timing)
 523                                 latency += missLatency2;
 524
 525                             if (p->fixupFault(vaddr))
 526                                 pte = p->pTable->lookup(vaddr);
 527                         }
 528
 529                         if (!pte) {
 530                             return std::make_shared<PageFault>(vaddr, true,
 531                                                                mode, true,
 532                                                                false);
 533                         } else {
 534                             Addr alignedVaddr = p->pTable->pageAlign(vaddr);
 535
 536                             DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
 537                                     alignedVaddr, pte->paddr);
 538
 539                             TlbEntry gpuEntry(p->pid(), alignedVaddr,
 540                                               pte->paddr, false, false);
 541                             entry = insert(alignedVaddr, gpuEntry);
 542                         }
 543
 544                         DPRINTF(GPUTLB, "Miss was serviced.\n");
 545                     }
 546                 } else {
 547                     localNumTLBHits++;
 548
 549                     if (timing) {
 550                         latency = hitLatency;
 551                     }
 552                 }
 553
 554                 // Do paging protection checks.
 555                 bool inUser = (m5Reg.cpl == 3 &&
 556                                !(flags & (CPL0FlagBit << FlagShift)));
 557
 558                 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
 559                 bool badWrite = (!entry->writable && (inUser || cr0.wp));
 560
 561                 if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
 562                      badWrite)) {
 563                     // The page must have been present to get into the TLB in
 564                     // the first place. We'll assume the reserved bits are
 565                     // fine even though we're not checking them.
 566                     return std::make_shared<PageFault>(vaddr, true, mode,
 567                                                        inUser, false);
 568                 }
 569
 570                 if (storeCheck && badWrite) {
 571                     // This would fault if this were a write, so return a page
 572                     // fault that reflects that happening.
 573                     return std::make_shared<PageFault>(vaddr, true,
 574                                                        BaseTLB::Write,
 575                                                        inUser, false);
 576                 }
 577
 578
 579                 DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
 580                         "checks.\n", entry->paddr);
 581
 582                 int page_size = entry->size();
 583                 Addr paddr = entry->paddr | (vaddr & (page_size - 1));
 584                 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
 585                 req->setPaddr(paddr);
 586
 587                 if (entry->uncacheable)
 588                     req->setFlags(Request::UNCACHEABLE);
 589             } else {
 590                 //Use the address which already has segmentation applied.
 591                 DPRINTF(GPUTLB, "Paging disabled.\n");
 592                 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
 593                 req->setPaddr(vaddr);
 594             }
 595         } else {
 596             // Real mode
 597             DPRINTF(GPUTLB, "In real mode.\n");
 598             DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
 599             req->setPaddr(vaddr);
 600         }
 601
 602         // Check for an access to the local APIC
 603         if (FullSystem) {
 604             LocalApicBase localApicBase =
 605                 tc->readMiscRegNoEffect(MISCREG_APIC_BASE);
 606
 607             Addr baseAddr = localApicBase.base * PageBytes;
 608             Addr paddr = req->getPaddr();
 609
 610             if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
 611                 // Force the access to be uncacheable.
 612                 req->setFlags(Request::UNCACHEABLE);
 613                 req->setPaddr(x86LocalAPICAddress(tc->contextId(),
 614                                                   paddr - baseAddr));
 615             }
 616         }
 617
 618         return NoFault;
 619     };
 620
 621     Fault
 622     GpuTLB::translateAtomic(const RequestPtr &req, ThreadContext *tc,
 623                             Mode mode, int &latency)
 624     {
 625         bool delayedResponse;
 626
 627         return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
 628                                  latency);
 629     }
 630
 631     void
 632     GpuTLB::translateTiming(const RequestPtr &req, ThreadContext *tc,
 633             Translation *translation, Mode mode, int &latency)
 634     {
 635         bool delayedResponse;
 636         assert(translation);
 637
 638         Fault fault = GpuTLB::translate(req, tc, translation, mode,
 639                                         delayedResponse, true, latency);
 640
 641         if (!delayedResponse)
 642             translation->finish(fault, req, tc, mode);
 643     }
 644
 645     Walker*
 646     GpuTLB::getWalker()
 647     {
 648         return walker;
 649     }
 650
 651
 652     void
 653     GpuTLB::serialize(CheckpointOut &cp) const
 654     {
 655     }
 656
 657     void
 658     GpuTLB::unserialize(CheckpointIn &cp)
 659     {
 660     }
 661
 662     void
 663     GpuTLB::regStats()
 664     {
 665         ClockedObject::regStats();
 666
 667         localNumTLBAccesses
 668             .name(name() + ".local_TLB_accesses")
 669             .desc("Number of TLB accesses")
 670             ;
 671
 672         localNumTLBHits
 673             .name(name() + ".local_TLB_hits")
 674             .desc("Number of TLB hits")
 675             ;
 676
 677         localNumTLBMisses
 678             .name(name() + ".local_TLB_misses")
 679             .desc("Number of TLB misses")
 680             ;
 681
 682         localTLBMissRate
 683             .name(name() + ".local_TLB_miss_rate")
 684             .desc("TLB miss rate")
 685             ;
 686
 687         accessCycles
 688             .name(name() + ".access_cycles")
 689             .desc("Cycles spent accessing this TLB level")
 690             ;
 691
 692         pageTableCycles
 693             .name(name() + ".page_table_cycles")
 694             .desc("Cycles spent accessing the page table")
 695             ;
 696
 697         localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
 698
 699         numUniquePages
 700             .name(name() + ".unique_pages")
 701             .desc("Number of unique pages touched")
 702             ;
 703
 704         localCycles
 705             .name(name() + ".local_cycles")
 706             .desc("Number of cycles spent in queue for all incoming reqs")
 707             ;
 708
 709         localLatency
 710             .name(name() + ".local_latency")
 711             .desc("Avg. latency over incoming coalesced reqs")
 712             ;
 713
 714         localLatency = localCycles / localNumTLBAccesses;
 715
 716         globalNumTLBAccesses
 717             .name(name() + ".global_TLB_accesses")
 718             .desc("Number of TLB accesses")
 719             ;
 720
 721         globalNumTLBHits
 722             .name(name() + ".global_TLB_hits")
 723             .desc("Number of TLB hits")
 724             ;
 725
 726         globalNumTLBMisses
 727             .name(name() + ".global_TLB_misses")
 728             .desc("Number of TLB misses")
 729             ;
 730
 731         globalTLBMissRate
 732             .name(name() + ".global_TLB_miss_rate")
 733             .desc("TLB miss rate")
 734             ;
 735
 736         globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
 737
 738         avgReuseDistance
 739             .name(name() + ".avg_reuse_distance")
 740             .desc("avg. reuse distance over all pages (in ticks)")
 741             ;
 742
 743     }
 744
 745     /**
 746      * Do the TLB lookup for this coalesced request and schedule
 747      * another event <TLB access latency> cycles later.
 748      */
 749
 750     void
 751     GpuTLB::issueTLBLookup(PacketPtr pkt)
 752     {
 753         assert(pkt);
 754         assert(pkt->senderState);
 755
 756         Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
 757                                         TheISA::PageBytes);
 758
 759         TranslationState *sender_state =
 760                 safe_cast<TranslationState*>(pkt->senderState);
 761
 762         bool update_stats = !sender_state->prefetch;
 763         ThreadContext * tmp_tc = sender_state->tc;
 764
 765         DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
 766                 virt_page_addr);
 767
 768         int req_cnt = sender_state->reqCnt.back();
 769
 770         if (update_stats) {
 771             accessCycles -= (curTick() * req_cnt);
 772             localCycles -= curTick();
 773             updatePageFootprint(virt_page_addr);
 774             globalNumTLBAccesses += req_cnt;
 775         }
 776
 777         tlbOutcome lookup_outcome = TLB_MISS;
 778         const RequestPtr &tmp_req = pkt->req;
 779
 780         // Access the TLB and figure out if it's a hit or a miss.
 781         bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
 782
 783         if (success) {
 784             lookup_outcome = TLB_HIT;
 785             // Put the entry in SenderState
 786             TlbEntry *entry = lookup(tmp_req->getVaddr(), false);
 787             assert(entry);
 788
 789             auto p = sender_state->tc->getProcessPtr();
 790             sender_state->tlbEntry =
 791                 new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
 792                              false, false);
 793
 794             if (update_stats) {
 795                 // the reqCnt has an entry per level, so its size tells us
 796                 // which level we are in
 797                 sender_state->hitLevel = sender_state->reqCnt.size();
 798                 globalNumTLBHits += req_cnt;
 799             }
 800         } else {
 801             if (update_stats)
 802                 globalNumTLBMisses += req_cnt;
 803         }
 804
 805         /*
 806          * We now know the TLB lookup outcome (if it's a hit or a miss), as well
 807          * as the TLB access latency.
 808          *
 809          * We create and schedule a new TLBEvent which will help us take the
 810          * appropriate actions (e.g., update TLB on a hit, send request to lower
 811          * level TLB on a miss, or start a page walk if this was the last-level
 812          * TLB)
 813          */
 814         TLBEvent *tlb_event =
 815             new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
 816
 817         if (translationReturnEvent.count(virt_page_addr)) {
 818             panic("Virtual Page Address %#x already has a return event\n",
 819                   virt_page_addr);
 820         }
 821
 822         translationReturnEvent[virt_page_addr] = tlb_event;
 823         assert(tlb_event);
 824
 825         DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
 826                 curTick() + this->ticks(hitLatency));
 827
 828         schedule(tlb_event, curTick() + this->ticks(hitLatency));
 829     }
 830
 831     GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome,
 832                                PacketPtr _pkt)
 833         : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
 834         outcome(tlb_outcome), pkt(_pkt)
 835     {
 836     }
 837
 838     /**
 839      * Do Paging protection checks. If we encounter a page fault, then
 840      * an assertion is fired.
 841      */
 842     void
 843     GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
 844             TlbEntry * tlb_entry, Mode mode)
 845     {
 846         HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
 847         uint32_t flags = pkt->req->getFlags();
 848         bool storeCheck = flags & (StoreCheck << FlagShift);
 849
 850         // Do paging protection checks.
 851         bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
 852         CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
 853
 854         bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
 855
 856         if ((inUser && !tlb_entry->user) ||
 857             (mode == BaseTLB::Write && badWrite)) {
 858             // The page must have been present to get into the TLB in
 859             // the first place. We'll assume the reserved bits are
 860             // fine even though we're not checking them.
 861             panic("Page fault detected");
 862         }
 863
 864         if (storeCheck && badWrite) {
 865             // This would fault if this were a write, so return a page
 866             // fault that reflects that happening.
 867             panic("Page fault detected");
 868         }
 869     }
 870
 871     /**
 872      * handleTranslationReturn is called on a TLB hit,
 873      * when a TLB miss returns or when a page fault returns.
 874      * The latter calls handelHit with TLB miss as tlbOutcome.
 875      */
 876     void
 877     GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
 878             PacketPtr pkt)
 879     {
 880
 881         assert(pkt);
 882         Addr vaddr = pkt->req->getVaddr();
 883
 884         TranslationState *sender_state =
 885             safe_cast<TranslationState*>(pkt->senderState);
 886
 887         ThreadContext *tc = sender_state->tc;
 888         Mode mode = sender_state->tlbMode;
 889
 890         TlbEntry *local_entry, *new_entry;
 891
 892         if (tlb_outcome == TLB_HIT) {
 893             DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
 894             local_entry = sender_state->tlbEntry;
 895         } else {
 896             DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
 897                     vaddr);
 898
 899             // We are returning either from a page walk or from a hit at a lower
 900             // TLB level. The senderState should be "carrying" a pointer to the
 901             // correct TLBEntry.
 902             new_entry = sender_state->tlbEntry;
 903             assert(new_entry);
 904             local_entry = new_entry;
 905
 906             if (allocationPolicy) {
 907                 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
 908                         virt_page_addr);
 909
 910                 local_entry = insert(virt_page_addr, *new_entry);
 911             }
 912
 913             assert(local_entry);
 914         }
 915
 916         /**
 917          * At this point the packet carries an up-to-date tlbEntry pointer
 918          * in its senderState.
 919          * Next step is to do the paging protection checks.
 920          */
 921         DPRINTF(GPUTLB, "Entry found with vaddr %#x,  doing protection checks "
 922                 "while paddr was %#x.\n", local_entry->vaddr,
 923                 local_entry->paddr);
 924
 925         pagingProtectionChecks(tc, pkt, local_entry, mode);
 926         int page_size = local_entry->size();
 927         Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
 928         DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
 929
 930         // Since this packet will be sent through the cpu side slave port,
 931         // it must be converted to a response pkt if it is not one already
 932         if (pkt->isRequest()) {
 933             pkt->makeTimingResponse();
 934         }
 935
 936         pkt->req->setPaddr(paddr);
 937
 938         if (local_entry->uncacheable) {
 939              pkt->req->setFlags(Request::UNCACHEABLE);
 940         }
 941
 942         //send packet back to coalescer
 943         cpuSidePort[0]->sendTimingResp(pkt);
 944         //schedule cleanup event
 945         cleanupQueue.push(virt_page_addr);
 946
 947         // schedule this only once per cycle.
 948         // The check is required because we might have multiple translations
 949         // returning the same cycle
 950         // this is a maximum priority event and must be on the same cycle
 951         // as the cleanup event in TLBCoalescer to avoid a race with
 952         // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
 953         if (!cleanupEvent.scheduled())
 954             schedule(cleanupEvent, curTick());
 955     }
 956
 957     /**
 958      * Here we take the appropriate actions based on the result of the
 959      * TLB lookup.
 960      */
 961     void
 962     GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
 963                               PacketPtr pkt)
 964     {
 965         DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
 966
 967         assert(translationReturnEvent[virtPageAddr]);
 968         assert(pkt);
 969
 970         TranslationState *tmp_sender_state =
 971             safe_cast<TranslationState*>(pkt->senderState);
 972
 973         int req_cnt = tmp_sender_state->reqCnt.back();
 974         bool update_stats = !tmp_sender_state->prefetch;
 975
 976
 977         if (outcome == TLB_HIT) {
 978             handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
 979
 980             if (update_stats) {
 981                 accessCycles += (req_cnt * curTick());
 982                 localCycles += curTick();
 983             }
 984
 985         } else if (outcome == TLB_MISS) {
 986
 987             DPRINTF(GPUTLB, "This is a TLB miss\n");
 988             if (update_stats) {
 989                 accessCycles += (req_cnt*curTick());
 990                 localCycles += curTick();
 991             }
 992
 993             if (hasMemSidePort) {
 994                 // the one cyle added here represent the delay from when we get
 995                 // the reply back till when we propagate it to the coalescer
 996                 // above.
 997                 if (update_stats) {
 998                     accessCycles += (req_cnt * 1);
 999                     localCycles += 1;
1000                 }
1001
1002                 /**
1003                  * There is a TLB below. Send the coalesced request.
1004                  * We actually send the very first packet of all the
1005                  * pending packets for this virtual page address.
1006                  */
1007                 if (!memSidePort[0]->sendTimingReq(pkt)) {
1008                     DPRINTF(GPUTLB, "Failed sending translation request to "
1009                             "lower level TLB for addr %#x\n", virtPageAddr);
1010
1011                     memSidePort[0]->retries.push_back(pkt);
1012                 } else {
1013                     DPRINTF(GPUTLB, "Sent translation request to lower level "
1014                             "TLB for addr %#x\n", virtPageAddr);
1015                 }
1016             } else {
1017                 //this is the last level TLB. Start a page walk
1018                 DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1019                         "addr %#x\n", virtPageAddr);
1020
1021                 if (update_stats)
1022                     pageTableCycles -= (req_cnt*curTick());
1023
1024                 TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
1025                 assert(tlb_event);
1026                 tlb_event->updateOutcome(PAGE_WALK);
1027                 schedule(tlb_event, curTick() + ticks(missLatency2));
1028             }
1029         } else if (outcome == PAGE_WALK) {
1030             if (update_stats)
1031                 pageTableCycles += (req_cnt*curTick());
1032
1033             // Need to access the page table and update the TLB
1034             DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1035                     virtPageAddr);
1036
1037             TranslationState *sender_state =
1038                 safe_cast<TranslationState*>(pkt->senderState);
1039
1040             Process *p = sender_state->tc->getProcessPtr();
1041             Addr vaddr = pkt->req->getVaddr();
1042     #ifndef NDEBUG
1043             Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1044             assert(alignedVaddr == virtPageAddr);
1045     #endif
1046             const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr);
1047             if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1048                     p->fixupFault(vaddr)) {
1049                 pte = p->pTable->lookup(vaddr);
1050             }
1051
1052             if (pte) {
1053                 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1054                         pte->paddr);
1055
1056                 sender_state->tlbEntry =
1057                     new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false,
1058                                  false);
1059             } else {
1060                 sender_state->tlbEntry = nullptr;
1061             }
1062
1063             handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1064         } else if (outcome == MISS_RETURN) {
1065             /** we add an extra cycle in the return path of the translation
1066              * requests in between the various TLB levels.
1067              */
1068             handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1069         } else {
1070             panic("Unexpected TLB outcome %d", outcome);
1071         }
1072     }
1073
1074     void
1075     GpuTLB::TLBEvent::process()
1076     {
1077         tlb->translationReturn(virtPageAddr, outcome, pkt);
1078     }
1079
1080     const char*
1081     GpuTLB::TLBEvent::description() const
1082     {
1083         return "trigger translationDoneEvent";
1084     }
1085
1086     void
1087     GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
1088     {
1089         outcome = _outcome;
1090     }
1091
1092     Addr
1093     GpuTLB::TLBEvent::getTLBEventVaddr()
1094     {
1095         return virtPageAddr;
1096     }
1097
1098     /*
1099      * recvTiming receives a coalesced timing request from a TLBCoalescer
1100      * and it calls issueTLBLookup()
1101      * It only rejects the packet if we have exceeded the max
1102      * outstanding number of requests for the TLB
1103      */
1104     bool
1105     GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
1106     {
1107         if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
1108             tlb->issueTLBLookup(pkt);
1109             // update number of outstanding translation requests
1110             tlb->outstandingReqs++;
1111             return true;
1112          } else {
1113             DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1114                     tlb->outstandingReqs);
1115             return false;
1116          }
1117     }
1118
1119     /**
1120      * handleFuncTranslationReturn is called on a TLB hit,
1121      * when a TLB miss returns or when a page fault returns.
1122      * It updates LRU, inserts the TLB entry on a miss
1123      * depending on the allocation policy and does the required
1124      * protection checks. It does NOT create a new packet to
1125      * update the packet's addr; this is done in hsail-gpu code.
1126      */
1127     void
1128     GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
1129     {
1130         TranslationState *sender_state =
1131             safe_cast<TranslationState*>(pkt->senderState);
1132
1133         ThreadContext *tc = sender_state->tc;
1134         Mode mode = sender_state->tlbMode;
1135         Addr vaddr = pkt->req->getVaddr();
1136
1137         TlbEntry *local_entry, *new_entry;
1138
1139         if (tlb_outcome == TLB_HIT) {
1140             DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1141                     "%#x\n", vaddr);
1142
1143             local_entry = sender_state->tlbEntry;
1144         } else {
1145             DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1146                     "%#x\n", vaddr);
1147
1148             // We are returning either from a page walk or from a hit at a lower
1149             // TLB level. The senderState should be "carrying" a pointer to the
1150             // correct TLBEntry.
1151             new_entry = sender_state->tlbEntry;
1152             assert(new_entry);
1153             local_entry = new_entry;
1154
1155             if (allocationPolicy) {
1156                 Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1157
1158                 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1159                         virt_page_addr);
1160
1161                 local_entry = insert(virt_page_addr, *new_entry);
1162             }
1163
1164             assert(local_entry);
1165         }
1166
1167         DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1168                 "while paddr was %#x.\n", local_entry->vaddr,
1169                 local_entry->paddr);
1170
1171         /**
1172          * Do paging checks if it's a normal functional access.  If it's for a
1173          * prefetch, then sometimes you can try to prefetch something that
1174          * won't pass protection. We don't actually want to fault becuase there
1175          * is no demand access to deem this a violation.  Just put it in the
1176          * TLB and it will fault if indeed a future demand access touches it in
1177          * violation.
1178          *
1179          * This feature could be used to explore security issues around
1180          * speculative memory accesses.
1181          */
1182         if (!sender_state->prefetch && sender_state->tlbEntry)
1183             pagingProtectionChecks(tc, pkt, local_entry, mode);
1184
1185         int page_size = local_entry->size();
1186         Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1187         DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1188
1189         pkt->req->setPaddr(paddr);
1190
1191         if (local_entry->uncacheable)
1192              pkt->req->setFlags(Request::UNCACHEABLE);
1193     }
1194
1195     // This is used for atomic translations. Need to
1196     // make it all happen during the same cycle.
1197     void
1198     GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
1199     {
1200         TranslationState *sender_state =
1201             safe_cast<TranslationState*>(pkt->senderState);
1202
1203         ThreadContext *tc = sender_state->tc;
1204         bool update_stats = !sender_state->prefetch;
1205
1206         Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1207                                         TheISA::PageBytes);
1208
1209         if (update_stats)
1210             tlb->updatePageFootprint(virt_page_addr);
1211
1212         // do the TLB lookup without updating the stats
1213         bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1214         tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1215
1216         // functional mode means no coalescing
1217         // global metrics are the same as the local metrics
1218         if (update_stats) {
1219             tlb->globalNumTLBAccesses++;
1220
1221             if (success) {
1222                 sender_state->hitLevel = sender_state->reqCnt.size();
1223                 tlb->globalNumTLBHits++;
1224             }
1225         }
1226
1227         if (!success) {
1228             if (update_stats)
1229                 tlb->globalNumTLBMisses++;
1230             if (tlb->hasMemSidePort) {
1231                 // there is a TLB below -> propagate down the TLB hierarchy
1232                 tlb->memSidePort[0]->sendFunctional(pkt);
1233                 // If no valid translation from a prefetch, then just return
1234                 if (sender_state->prefetch && !pkt->req->hasPaddr())
1235                     return;
1236             } else {
1237                 // Need to access the page table and update the TLB
1238                 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1239                         virt_page_addr);
1240
1241                 Process *p = tc->getProcessPtr();
1242
1243                 Addr vaddr = pkt->req->getVaddr();
1244     #ifndef NDEBUG
1245                 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1246                 assert(alignedVaddr == virt_page_addr);
1247     #endif
1248
1249                 const EmulationPageTable::Entry *pte =
1250                         p->pTable->lookup(vaddr);
1251                 if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1252                         p->fixupFault(vaddr)) {
1253                     pte = p->pTable->lookup(vaddr);
1254                 }
1255
1256                 if (!sender_state->prefetch) {
1257                     // no PageFaults are permitted after
1258                     // the second page table lookup
1259                     assert(pte);
1260
1261                     DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1262                             pte->paddr);
1263
1264                     sender_state->tlbEntry =
1265                         new TlbEntry(p->pid(), virt_page_addr,
1266                                      pte->paddr, false, false);
1267                 } else {
1268                     // If this was a prefetch, then do the normal thing if it
1269                     // was a successful translation.  Otherwise, send an empty
1270                     // TLB entry back so that it can be figured out as empty and
1271                     // handled accordingly.
1272                     if (pte) {
1273                         DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1274                                 pte->paddr);
1275
1276                         sender_state->tlbEntry =
1277                             new TlbEntry(p->pid(), virt_page_addr,
1278                                          pte->paddr, false, false);
1279                     } else {
1280                         DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1281                                 alignedVaddr);
1282
1283                         sender_state->tlbEntry = nullptr;
1284
1285                         return;
1286                     }
1287                 }
1288             }
1289         } else {
1290             DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1291                     tlb->lookup(pkt->req->getVaddr()));
1292
1293             TlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1294                                              update_stats);
1295
1296             assert(entry);
1297
1298             auto p = sender_state->tc->getProcessPtr();
1299             sender_state->tlbEntry =
1300                 new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1301                              false, false);
1302         }
1303         // This is the function that would populate pkt->req with the paddr of
1304         // the translation. But if no translation happens (i.e Prefetch fails)
1305         // then the early returns in the above code wiill keep this function
1306         // from executing.
1307         tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1308     }
1309
1310     void
1311     GpuTLB::CpuSidePort::recvReqRetry()
1312     {
1313         // The CPUSidePort never sends anything but replies. No retries
1314         // expected.
1315         panic("recvReqRetry called");
1316     }
1317
1318     AddrRangeList
1319     GpuTLB::CpuSidePort::getAddrRanges() const
1320     {
1321         // currently not checked by the master
1322         AddrRangeList ranges;
1323
1324         return ranges;
1325     }
1326
1327     /**
1328      * MemSidePort receives the packet back.
1329      * We need to call the handleTranslationReturn
1330      * and propagate up the hierarchy.
1331      */
1332     bool
1333     GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
1334     {
1335         Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1336                                         TheISA::PageBytes);
1337
1338         DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1339                 virt_page_addr);
1340
1341         TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1342         assert(tlb_event);
1343         assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1344
1345         tlb_event->updateOutcome(MISS_RETURN);
1346         tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
1347
1348         return true;
1349     }
1350
1351     void
1352     GpuTLB::MemSidePort::recvReqRetry()
1353     {
1354         // No retries should reach the TLB. The retries
1355         // should only reach the TLBCoalescer.
1356         panic("recvReqRetry called");
1357     }
1358
1359     void
1360     GpuTLB::cleanup()
1361     {
1362         while (!cleanupQueue.empty()) {
1363             Addr cleanup_addr = cleanupQueue.front();
1364             cleanupQueue.pop();
1365
1366             // delete TLBEvent
1367             TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1368             delete old_tlb_event;
1369             translationReturnEvent.erase(cleanup_addr);
1370
1371             // update number of outstanding requests
1372             outstandingReqs--;
1373         }
1374
1375         /** the higher level coalescer should retry if it has
1376          * any pending requests.
1377          */
1378         for (int i = 0; i < cpuSidePort.size(); ++i) {
1379             cpuSidePort[i]->sendRetryReq();
1380         }
1381     }
1382
1383     void
1384     GpuTLB::updatePageFootprint(Addr virt_page_addr)
1385     {
1386
1387         std::pair<AccessPatternTable::iterator, bool> ret;
1388
1389         AccessInfo tmp_access_info;
1390         tmp_access_info.lastTimeAccessed = 0;
1391         tmp_access_info.accessesPerPage = 0;
1392         tmp_access_info.totalReuseDistance = 0;
1393         tmp_access_info.sumDistance = 0;
1394         tmp_access_info.meanDistance = 0;
1395
1396         ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1397                                   tmp_access_info));
1398
1399         bool first_page_access = ret.second;
1400
1401         if (first_page_access) {
1402             numUniquePages++;
1403         } else  {
1404             int accessed_before;
1405             accessed_before  = curTick() - ret.first->second.lastTimeAccessed;
1406             ret.first->second.totalReuseDistance += accessed_before;
1407         }
1408
1409         ret.first->second.accessesPerPage++;
1410         ret.first->second.lastTimeAccessed = curTick();
1411
1412         if (accessDistance) {
1413             ret.first->second.localTLBAccesses
1414                 .push_back(localNumTLBAccesses.value());
1415         }
1416     }
1417
1418     void
1419     GpuTLB::exitCallback()
1420     {
1421         std::ostream *page_stat_file = nullptr;
1422
1423         if (accessDistance) {
1424
1425             // print per page statistics to a separate file (.csv format)
1426             // simout is the gem5 output directory (default is m5out or the one
1427             // specified with -d
1428             page_stat_file = simout.create(name().c_str())->stream();
1429
1430             // print header
1431             *page_stat_file << "page,max_access_distance,mean_access_distance, "
1432                             << "stddev_distance" << std::endl;
1433         }
1434
1435         // update avg. reuse distance footprint
1436         AccessPatternTable::iterator iter, iter_begin, iter_end;
1437         unsigned int sum_avg_reuse_distance_per_page = 0;
1438
1439         // iterate through all pages seen by this TLB
1440         for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
1441             sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1442                                                iter->second.accessesPerPage;
1443
1444             if (accessDistance) {
1445                 unsigned int tmp = iter->second.localTLBAccesses[0];
1446                 unsigned int prev = tmp;
1447
1448                 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1449                     if (i) {
1450                         tmp = prev + 1;
1451                     }
1452
1453                     prev = iter->second.localTLBAccesses[i];
1454                     // update the localTLBAccesses value
1455                     // with the actual differece
1456                     iter->second.localTLBAccesses[i] -= tmp;
1457                     // compute the sum of AccessDistance per page
1458                     // used later for mean
1459                     iter->second.sumDistance +=
1460                         iter->second.localTLBAccesses[i];
1461                 }
1462
1463                 iter->second.meanDistance =
1464                     iter->second.sumDistance / iter->second.accessesPerPage;
1465
1466                 // compute std_dev and max  (we need a second round because we
1467                 // need to know the mean value
1468                 unsigned int max_distance = 0;
1469                 unsigned int stddev_distance = 0;
1470
1471                 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1472                     unsigned int tmp_access_distance =
1473                         iter->second.localTLBAccesses[i];
1474
1475                     if (tmp_access_distance > max_distance) {
1476                         max_distance = tmp_access_distance;
1477                     }
1478
1479                     unsigned int diff =
1480                         tmp_access_distance - iter->second.meanDistance;
1481                     stddev_distance += pow(diff, 2);
1482
1483                 }
1484
1485                 stddev_distance =
1486                     sqrt(stddev_distance/iter->second.accessesPerPage);
1487
1488                 if (page_stat_file) {
1489                     *page_stat_file << std::hex << iter->first << ",";
1490                     *page_stat_file << std::dec << max_distance << ",";
1491                     *page_stat_file << std::dec << iter->second.meanDistance
1492                                     << ",";
1493                     *page_stat_file << std::dec << stddev_distance;
1494                     *page_stat_file << std::endl;
1495                 }
1496
1497                 // erase the localTLBAccesses array
1498                 iter->second.localTLBAccesses.clear();
1499             }
1500         }
1501
1502         if (!TLBFootprint.empty()) {
1503             avgReuseDistance =
1504                 sum_avg_reuse_distance_per_page / TLBFootprint.size();
1505         }
1506
1507         //clear the TLBFootprint map
1508         TLBFootprint.clear();
1509     }
1510 } // namespace X86ISA
1511
1512 X86ISA::GpuTLB*
1513 X86GPUTLBParams::create()
1514 {
1515     return new X86ISA::GpuTLB(this);
1516 }
1517