src/gpu-compute/gpu_tlb.cc

   1 /*
   2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
   3  * All rights reserved.
   4  *
   5  * For use for simulation and test purposes only
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions are met:
   9  *
  10  * 1. Redistributions of source code must retain the above copyright notice,
  11  * this list of conditions and the following disclaimer.
  12  *
  13  * 2. Redistributions in binary form must reproduce the above copyright notice,
  14  * this list of conditions and the following disclaimer in the documentation
  15  * and/or other materials provided with the distribution.
  16  *
  17  * 3. Neither the name of the copyright holder nor the names of its contributors
  18  * may be used to endorse or promote products derived from this software
  19  * without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31  * POSSIBILITY OF SUCH DAMAGE.
  32  *
  33  * Author: Lisa Hsu
  34  */
  35
  36 #include "gpu-compute/gpu_tlb.hh"
  37
  38 #include <cmath>
  39 #include <cstring>
  40
  41 #include "arch/x86/faults.hh"
  42 #include "arch/x86/insts/microldstop.hh"
  43 #include "arch/x86/pagetable.hh"
  44 #include "arch/x86/pagetable_walker.hh"
  45 #include "arch/x86/regs/misc.hh"
  46 #include "arch/x86/x86_traits.hh"
  47 #include "base/bitfield.hh"
  48 #include "base/logging.hh"
  49 #include "base/output.hh"
  50 #include "base/trace.hh"
  51 #include "cpu/base.hh"
  52 #include "cpu/thread_context.hh"
  53 #include "debug/GPUPrefetch.hh"
  54 #include "debug/GPUTLB.hh"
  55 #include "mem/packet_access.hh"
  56 #include "mem/page_table.hh"
  57 #include "mem/request.hh"
  58 #include "sim/process.hh"
  59
  60 namespace X86ISA
  61 {
  62
  63     GpuTLB::GpuTLB(const Params *p)
  64         : MemObject(p), configAddress(0), size(p->size),
  65           cleanupEvent([this]{ cleanup(); }, name(), false,
  66                        Event::Maximum_Pri),
  67           exitEvent([this]{ exitCallback(); }, name())
  68     {
  69         assoc = p->assoc;
  70         assert(assoc <= size);
  71         numSets = size/assoc;
  72         allocationPolicy = p->allocationPolicy;
  73         hasMemSidePort = false;
  74         accessDistance = p->accessDistance;
  75         clock = p->clk_domain->clockPeriod();
  76
  77         tlb.assign(size, TlbEntry());
  78
  79         freeList.resize(numSets);
  80         entryList.resize(numSets);
  81
  82         for (int set = 0; set < numSets; ++set) {
  83             for (int way = 0; way < assoc; ++way) {
  84                 int x = set * assoc + way;
  85                 freeList[set].push_back(&tlb.at(x));
  86             }
  87         }
  88
  89         FA = (size == assoc);
  90
  91         /**
  92          * @warning: the set-associative version assumes you have a
  93          * fixed page size of 4KB.
  94          * If the page size is greather than 4KB (as defined in the
  95          * TheISA::PageBytes), then there are various issues w/ the current
  96          * implementation (you'd have the same 8KB page being replicated in
  97          * different sets etc)
  98          */
  99         setMask = numSets - 1;
 100
 101     #if 0
 102         // GpuTLB doesn't yet support full system
 103         walker = p->walker;
 104         walker->setTLB(this);
 105     #endif
 106
 107         maxCoalescedReqs = p->maxOutstandingReqs;
 108
 109         // Do not allow maxCoalescedReqs to be more than the TLB associativity
 110         if (maxCoalescedReqs > assoc) {
 111             maxCoalescedReqs = assoc;
 112             cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
 113         }
 114
 115         outstandingReqs = 0;
 116         hitLatency = p->hitLatency;
 117         missLatency1 = p->missLatency1;
 118         missLatency2 = p->missLatency2;
 119
 120         // create the slave ports based on the number of connected ports
 121         for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
 122             cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
 123                                   name(), i), this, i));
 124         }
 125
 126         // create the master ports based on the number of connected ports
 127         for (size_t i = 0; i < p->port_master_connection_count; ++i) {
 128             memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
 129                                   name(), i), this, i));
 130         }
 131     }
 132
 133     // fixme: this is never called?
 134     GpuTLB::~GpuTLB()
 135     {
 136         // make sure all the hash-maps are empty
 137         assert(translationReturnEvent.empty());
 138     }
 139
 140     BaseSlavePort&
 141     GpuTLB::getSlavePort(const std::string &if_name, PortID idx)
 142     {
 143         if (if_name == "slave") {
 144             if (idx >= static_cast<PortID>(cpuSidePort.size())) {
 145                 panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx);
 146             }
 147
 148             return *cpuSidePort[idx];
 149         } else {
 150             panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
 151         }
 152     }
 153
 154     BaseMasterPort&
 155     GpuTLB::getMasterPort(const std::string &if_name, PortID idx)
 156     {
 157         if (if_name == "master") {
 158             if (idx >= static_cast<PortID>(memSidePort.size())) {
 159                 panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx);
 160             }
 161
 162             hasMemSidePort = true;
 163
 164             return *memSidePort[idx];
 165         } else {
 166             panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
 167         }
 168     }
 169
 170     TlbEntry*
 171     GpuTLB::insert(Addr vpn, TlbEntry &entry)
 172     {
 173         TlbEntry *newEntry = nullptr;
 174
 175         /**
 176          * vpn holds the virtual page address
 177          * The least significant bits are simply masked
 178          */
 179         int set = (vpn >> TheISA::PageShift) & setMask;
 180
 181         if (!freeList[set].empty()) {
 182             newEntry = freeList[set].front();
 183             freeList[set].pop_front();
 184         } else {
 185             newEntry = entryList[set].back();
 186             entryList[set].pop_back();
 187         }
 188
 189         *newEntry = entry;
 190         newEntry->vaddr = vpn;
 191         entryList[set].push_front(newEntry);
 192
 193         return newEntry;
 194     }
 195
 196     GpuTLB::EntryList::iterator
 197     GpuTLB::lookupIt(Addr va, bool update_lru)
 198     {
 199         int set = (va >> TheISA::PageShift) & setMask;
 200
 201         if (FA) {
 202             assert(!set);
 203         }
 204
 205         auto entry = entryList[set].begin();
 206         for (; entry != entryList[set].end(); ++entry) {
 207             int page_size = (*entry)->size();
 208
 209             if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
 210                 DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
 211                         "with size %#x.\n", va, (*entry)->vaddr, page_size);
 212
 213                 if (update_lru) {
 214                     entryList[set].push_front(*entry);
 215                     entryList[set].erase(entry);
 216                     entry = entryList[set].begin();
 217                 }
 218
 219                 break;
 220             }
 221         }
 222
 223         return entry;
 224     }
 225
 226     TlbEntry*
 227     GpuTLB::lookup(Addr va, bool update_lru)
 228     {
 229         int set = (va >> TheISA::PageShift) & setMask;
 230
 231         auto entry = lookupIt(va, update_lru);
 232
 233         if (entry == entryList[set].end())
 234             return nullptr;
 235         else
 236             return *entry;
 237     }
 238
 239     void
 240     GpuTLB::invalidateAll()
 241     {
 242         DPRINTF(GPUTLB, "Invalidating all entries.\n");
 243
 244         for (int i = 0; i < numSets; ++i) {
 245             while (!entryList[i].empty()) {
 246                 TlbEntry *entry = entryList[i].front();
 247                 entryList[i].pop_front();
 248                 freeList[i].push_back(entry);
 249             }
 250         }
 251     }
 252
 253     void
 254     GpuTLB::setConfigAddress(uint32_t addr)
 255     {
 256         configAddress = addr;
 257     }
 258
 259     void
 260     GpuTLB::invalidateNonGlobal()
 261     {
 262         DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
 263
 264         for (int i = 0; i < numSets; ++i) {
 265             for (auto entryIt = entryList[i].begin();
 266                  entryIt != entryList[i].end();) {
 267                 if (!(*entryIt)->global) {
 268                     freeList[i].push_back(*entryIt);
 269                     entryList[i].erase(entryIt++);
 270                 } else {
 271                     ++entryIt;
 272                 }
 273             }
 274         }
 275     }
 276
 277     void
 278     GpuTLB::demapPage(Addr va, uint64_t asn)
 279     {
 280
 281         int set = (va >> TheISA::PageShift) & setMask;
 282         auto entry = lookupIt(va, false);
 283
 284         if (entry != entryList[set].end()) {
 285             freeList[set].push_back(*entry);
 286             entryList[set].erase(entry);
 287         }
 288     }
 289
 290     Fault
 291     GpuTLB::translateInt(const RequestPtr &req, ThreadContext *tc)
 292     {
 293         DPRINTF(GPUTLB, "Addresses references internal memory.\n");
 294         Addr vaddr = req->getVaddr();
 295         Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
 296
 297         if (prefix == IntAddrPrefixCPUID) {
 298             panic("CPUID memory space not yet implemented!\n");
 299         } else if (prefix == IntAddrPrefixMSR) {
 300             vaddr = vaddr >> 3;
 301             req->setFlags(Request::MMAPPED_IPR);
 302             Addr regNum = 0;
 303
 304             switch (vaddr & ~IntAddrPrefixMask) {
 305               case 0x10:
 306                 regNum = MISCREG_TSC;
 307                 break;
 308               case 0x1B:
 309                 regNum = MISCREG_APIC_BASE;
 310                 break;
 311               case 0xFE:
 312                 regNum = MISCREG_MTRRCAP;
 313                 break;
 314               case 0x174:
 315                 regNum = MISCREG_SYSENTER_CS;
 316                 break;
 317               case 0x175:
 318                 regNum = MISCREG_SYSENTER_ESP;
 319                 break;
 320               case 0x176:
 321                 regNum = MISCREG_SYSENTER_EIP;
 322                 break;
 323               case 0x179:
 324                 regNum = MISCREG_MCG_CAP;
 325                 break;
 326               case 0x17A:
 327                 regNum = MISCREG_MCG_STATUS;
 328                 break;
 329               case 0x17B:
 330                 regNum = MISCREG_MCG_CTL;
 331                 break;
 332               case 0x1D9:
 333                 regNum = MISCREG_DEBUG_CTL_MSR;
 334                 break;
 335               case 0x1DB:
 336                 regNum = MISCREG_LAST_BRANCH_FROM_IP;
 337                 break;
 338               case 0x1DC:
 339                 regNum = MISCREG_LAST_BRANCH_TO_IP;
 340                 break;
 341               case 0x1DD:
 342                 regNum = MISCREG_LAST_EXCEPTION_FROM_IP;
 343                 break;
 344               case 0x1DE:
 345                 regNum = MISCREG_LAST_EXCEPTION_TO_IP;
 346                 break;
 347               case 0x200:
 348                 regNum = MISCREG_MTRR_PHYS_BASE_0;
 349                 break;
 350               case 0x201:
 351                 regNum = MISCREG_MTRR_PHYS_MASK_0;
 352                 break;
 353               case 0x202:
 354                 regNum = MISCREG_MTRR_PHYS_BASE_1;
 355                 break;
 356               case 0x203:
 357                 regNum = MISCREG_MTRR_PHYS_MASK_1;
 358                 break;
 359               case 0x204:
 360                 regNum = MISCREG_MTRR_PHYS_BASE_2;
 361                 break;
 362               case 0x205:
 363                 regNum = MISCREG_MTRR_PHYS_MASK_2;
 364                 break;
 365               case 0x206:
 366                 regNum = MISCREG_MTRR_PHYS_BASE_3;
 367                 break;
 368               case 0x207:
 369                 regNum = MISCREG_MTRR_PHYS_MASK_3;
 370                 break;
 371               case 0x208:
 372                 regNum = MISCREG_MTRR_PHYS_BASE_4;
 373                 break;
 374               case 0x209:
 375                 regNum = MISCREG_MTRR_PHYS_MASK_4;
 376                 break;
 377               case 0x20A:
 378                 regNum = MISCREG_MTRR_PHYS_BASE_5;
 379                 break;
 380               case 0x20B:
 381                 regNum = MISCREG_MTRR_PHYS_MASK_5;
 382                 break;
 383               case 0x20C:
 384                 regNum = MISCREG_MTRR_PHYS_BASE_6;
 385                 break;
 386               case 0x20D:
 387                 regNum = MISCREG_MTRR_PHYS_MASK_6;
 388                 break;
 389               case 0x20E:
 390                 regNum = MISCREG_MTRR_PHYS_BASE_7;
 391                 break;
 392               case 0x20F:
 393                 regNum = MISCREG_MTRR_PHYS_MASK_7;
 394                 break;
 395               case 0x250:
 396                 regNum = MISCREG_MTRR_FIX_64K_00000;
 397                 break;
 398               case 0x258:
 399                 regNum = MISCREG_MTRR_FIX_16K_80000;
 400                 break;
 401               case 0x259:
 402                 regNum = MISCREG_MTRR_FIX_16K_A0000;
 403                 break;
 404               case 0x268:
 405                 regNum = MISCREG_MTRR_FIX_4K_C0000;
 406                 break;
 407               case 0x269:
 408                 regNum = MISCREG_MTRR_FIX_4K_C8000;
 409                 break;
 410               case 0x26A:
 411                 regNum = MISCREG_MTRR_FIX_4K_D0000;
 412                 break;
 413               case 0x26B:
 414                 regNum = MISCREG_MTRR_FIX_4K_D8000;
 415                 break;
 416               case 0x26C:
 417                 regNum = MISCREG_MTRR_FIX_4K_E0000;
 418                 break;
 419               case 0x26D:
 420                 regNum = MISCREG_MTRR_FIX_4K_E8000;
 421                 break;
 422               case 0x26E:
 423                 regNum = MISCREG_MTRR_FIX_4K_F0000;
 424                 break;
 425               case 0x26F:
 426                 regNum = MISCREG_MTRR_FIX_4K_F8000;
 427                 break;
 428               case 0x277:
 429                 regNum = MISCREG_PAT;
 430                 break;
 431               case 0x2FF:
 432                 regNum = MISCREG_DEF_TYPE;
 433                 break;
 434               case 0x400:
 435                 regNum = MISCREG_MC0_CTL;
 436                 break;
 437               case 0x404:
 438                 regNum = MISCREG_MC1_CTL;
 439                 break;
 440               case 0x408:
 441                 regNum = MISCREG_MC2_CTL;
 442                 break;
 443               case 0x40C:
 444                 regNum = MISCREG_MC3_CTL;
 445                 break;
 446               case 0x410:
 447                 regNum = MISCREG_MC4_CTL;
 448                 break;
 449               case 0x414:
 450                 regNum = MISCREG_MC5_CTL;
 451                 break;
 452               case 0x418:
 453                 regNum = MISCREG_MC6_CTL;
 454                 break;
 455               case 0x41C:
 456                 regNum = MISCREG_MC7_CTL;
 457                 break;
 458               case 0x401:
 459                 regNum = MISCREG_MC0_STATUS;
 460                 break;
 461               case 0x405:
 462                 regNum = MISCREG_MC1_STATUS;
 463                 break;
 464               case 0x409:
 465                 regNum = MISCREG_MC2_STATUS;
 466                 break;
 467               case 0x40D:
 468                 regNum = MISCREG_MC3_STATUS;
 469                 break;
 470               case 0x411:
 471                 regNum = MISCREG_MC4_STATUS;
 472                 break;
 473               case 0x415:
 474                 regNum = MISCREG_MC5_STATUS;
 475                 break;
 476               case 0x419:
 477                 regNum = MISCREG_MC6_STATUS;
 478                 break;
 479               case 0x41D:
 480                 regNum = MISCREG_MC7_STATUS;
 481                 break;
 482               case 0x402:
 483                 regNum = MISCREG_MC0_ADDR;
 484                 break;
 485               case 0x406:
 486                 regNum = MISCREG_MC1_ADDR;
 487                 break;
 488               case 0x40A:
 489                 regNum = MISCREG_MC2_ADDR;
 490                 break;
 491               case 0x40E:
 492                 regNum = MISCREG_MC3_ADDR;
 493                 break;
 494               case 0x412:
 495                 regNum = MISCREG_MC4_ADDR;
 496                 break;
 497               case 0x416:
 498                 regNum = MISCREG_MC5_ADDR;
 499                 break;
 500               case 0x41A:
 501                 regNum = MISCREG_MC6_ADDR;
 502                 break;
 503               case 0x41E:
 504                 regNum = MISCREG_MC7_ADDR;
 505                 break;
 506               case 0x403:
 507                 regNum = MISCREG_MC0_MISC;
 508                 break;
 509               case 0x407:
 510                 regNum = MISCREG_MC1_MISC;
 511                 break;
 512               case 0x40B:
 513                 regNum = MISCREG_MC2_MISC;
 514                 break;
 515               case 0x40F:
 516                 regNum = MISCREG_MC3_MISC;
 517                 break;
 518               case 0x413:
 519                 regNum = MISCREG_MC4_MISC;
 520                 break;
 521               case 0x417:
 522                 regNum = MISCREG_MC5_MISC;
 523                 break;
 524               case 0x41B:
 525                 regNum = MISCREG_MC6_MISC;
 526                 break;
 527               case 0x41F:
 528                 regNum = MISCREG_MC7_MISC;
 529                 break;
 530               case 0xC0000080:
 531                 regNum = MISCREG_EFER;
 532                 break;
 533               case 0xC0000081:
 534                 regNum = MISCREG_STAR;
 535                 break;
 536               case 0xC0000082:
 537                 regNum = MISCREG_LSTAR;
 538                 break;
 539               case 0xC0000083:
 540                 regNum = MISCREG_CSTAR;
 541                 break;
 542               case 0xC0000084:
 543                 regNum = MISCREG_SF_MASK;
 544                 break;
 545               case 0xC0000100:
 546                 regNum = MISCREG_FS_BASE;
 547                 break;
 548               case 0xC0000101:
 549                 regNum = MISCREG_GS_BASE;
 550                 break;
 551               case 0xC0000102:
 552                 regNum = MISCREG_KERNEL_GS_BASE;
 553                 break;
 554               case 0xC0000103:
 555                 regNum = MISCREG_TSC_AUX;
 556                 break;
 557               case 0xC0010000:
 558                 regNum = MISCREG_PERF_EVT_SEL0;
 559                 break;
 560               case 0xC0010001:
 561                 regNum = MISCREG_PERF_EVT_SEL1;
 562                 break;
 563               case 0xC0010002:
 564                 regNum = MISCREG_PERF_EVT_SEL2;
 565                 break;
 566               case 0xC0010003:
 567                 regNum = MISCREG_PERF_EVT_SEL3;
 568                 break;
 569               case 0xC0010004:
 570                 regNum = MISCREG_PERF_EVT_CTR0;
 571                 break;
 572               case 0xC0010005:
 573                 regNum = MISCREG_PERF_EVT_CTR1;
 574                 break;
 575               case 0xC0010006:
 576                 regNum = MISCREG_PERF_EVT_CTR2;
 577                 break;
 578               case 0xC0010007:
 579                 regNum = MISCREG_PERF_EVT_CTR3;
 580                 break;
 581               case 0xC0010010:
 582                 regNum = MISCREG_SYSCFG;
 583                 break;
 584               case 0xC0010016:
 585                 regNum = MISCREG_IORR_BASE0;
 586                 break;
 587               case 0xC0010017:
 588                 regNum = MISCREG_IORR_BASE1;
 589                 break;
 590               case 0xC0010018:
 591                 regNum = MISCREG_IORR_MASK0;
 592                 break;
 593               case 0xC0010019:
 594                 regNum = MISCREG_IORR_MASK1;
 595                 break;
 596               case 0xC001001A:
 597                 regNum = MISCREG_TOP_MEM;
 598                 break;
 599               case 0xC001001D:
 600                 regNum = MISCREG_TOP_MEM2;
 601                 break;
 602               case 0xC0010114:
 603                 regNum = MISCREG_VM_CR;
 604                 break;
 605               case 0xC0010115:
 606                 regNum = MISCREG_IGNNE;
 607                 break;
 608               case 0xC0010116:
 609                 regNum = MISCREG_SMM_CTL;
 610                 break;
 611               case 0xC0010117:
 612                 regNum = MISCREG_VM_HSAVE_PA;
 613                 break;
 614               default:
 615                 return std::make_shared<GeneralProtection>(0);
 616             }
 617             //The index is multiplied by the size of a MiscReg so that
 618             //any memory dependence calculations will not see these as
 619             //overlapping.
 620             req->setPaddr(regNum * sizeof(RegVal));
 621             return NoFault;
 622         } else if (prefix == IntAddrPrefixIO) {
 623             // TODO If CPL > IOPL or in virtual mode, check the I/O permission
 624             // bitmap in the TSS.
 625
 626             Addr IOPort = vaddr & ~IntAddrPrefixMask;
 627             // Make sure the address fits in the expected 16 bit IO address
 628             // space.
 629             assert(!(IOPort & ~0xFFFF));
 630
 631             if (IOPort == 0xCF8 && req->getSize() == 4) {
 632                 req->setFlags(Request::MMAPPED_IPR);
 633                 req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(RegVal));
 634             } else if ((IOPort & ~mask(2)) == 0xCFC) {
 635                 req->setFlags(Request::UNCACHEABLE);
 636
 637                 Addr configAddress =
 638                     tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
 639
 640                 if (bits(configAddress, 31, 31)) {
 641                     req->setPaddr(PhysAddrPrefixPciConfig |
 642                                   mbits(configAddress, 30, 2) |
 643                                   (IOPort & mask(2)));
 644                 } else {
 645                     req->setPaddr(PhysAddrPrefixIO | IOPort);
 646                 }
 647             } else {
 648                 req->setFlags(Request::UNCACHEABLE);
 649                 req->setPaddr(PhysAddrPrefixIO | IOPort);
 650             }
 651             return NoFault;
 652         } else {
 653             panic("Access to unrecognized internal address space %#x.\n",
 654                   prefix);
 655         }
 656     }
 657
 658     /**
 659      * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
 660      * and false on a TLB miss.
 661      * Many of the checks about different modes have been converted to
 662      * assertions, since these parts of the code are not really used.
 663      * On a hit it will update the LRU stack.
 664      */
 665     bool
 666     GpuTLB::tlbLookup(const RequestPtr &req,
 667                       ThreadContext *tc, bool update_stats)
 668     {
 669         bool tlb_hit = false;
 670     #ifndef NDEBUG
 671         uint32_t flags = req->getFlags();
 672         int seg = flags & SegmentFlagMask;
 673     #endif
 674
 675         assert(seg != SEGMENT_REG_MS);
 676         Addr vaddr = req->getVaddr();
 677         DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
 678         HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
 679
 680         if (m5Reg.prot) {
 681             DPRINTF(GPUTLB, "In protected mode.\n");
 682             // make sure we are in 64-bit mode
 683             assert(m5Reg.mode == LongMode);
 684
 685             // If paging is enabled, do the translation.
 686             if (m5Reg.paging) {
 687                 DPRINTF(GPUTLB, "Paging enabled.\n");
 688                 //update LRU stack on a hit
 689                 TlbEntry *entry = lookup(vaddr, true);
 690
 691                 if (entry)
 692                     tlb_hit = true;
 693
 694                 if (!update_stats) {
 695                     // functional tlb access for memory initialization
 696                     // i.e., memory seeding or instr. seeding -> don't update
 697                     // TLB and stats
 698                     return tlb_hit;
 699                 }
 700
 701                 localNumTLBAccesses++;
 702
 703                 if (!entry) {
 704                     localNumTLBMisses++;
 705                 } else {
 706                     localNumTLBHits++;
 707                 }
 708             }
 709         }
 710
 711         return tlb_hit;
 712     }
 713
 714     Fault
 715     GpuTLB::translate(const RequestPtr &req, ThreadContext *tc,
 716                       Translation *translation, Mode mode,
 717                       bool &delayedResponse, bool timing, int &latency)
 718     {
 719         uint32_t flags = req->getFlags();
 720         int seg = flags & SegmentFlagMask;
 721         bool storeCheck = flags & (StoreCheck << FlagShift);
 722
 723         // If this is true, we're dealing with a request
 724         // to a non-memory address space.
 725         if (seg == SEGMENT_REG_MS) {
 726             return translateInt(req, tc);
 727         }
 728
 729         delayedResponse = false;
 730         Addr vaddr = req->getVaddr();
 731         DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
 732
 733         HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
 734
 735         // If protected mode has been enabled...
 736         if (m5Reg.prot) {
 737             DPRINTF(GPUTLB, "In protected mode.\n");
 738             // If we're not in 64-bit mode, do protection/limit checks
 739             if (m5Reg.mode != LongMode) {
 740                 DPRINTF(GPUTLB, "Not in long mode. Checking segment "
 741                         "protection.\n");
 742
 743                 // Check for a null segment selector.
 744                 if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
 745                     seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
 746                     && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
 747                     return std::make_shared<GeneralProtection>(0);
 748                 }
 749
 750                 bool expandDown = false;
 751                 SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
 752
 753                 if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
 754                     if (!attr.writable && (mode == BaseTLB::Write ||
 755                         storeCheck))
 756                         return std::make_shared<GeneralProtection>(0);
 757
 758                     if (!attr.readable && mode == BaseTLB::Read)
 759                         return std::make_shared<GeneralProtection>(0);
 760
 761                     expandDown = attr.expandDown;
 762
 763                 }
 764
 765                 Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));
 766                 Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
 767                 // This assumes we're not in 64 bit mode. If we were, the
 768                 // default address size is 64 bits, overridable to 32.
 769                 int size = 32;
 770                 bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
 771                 SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
 772
 773                 if ((csAttr.defaultSize && sizeOverride) ||
 774                     (!csAttr.defaultSize && !sizeOverride)) {
 775                     size = 16;
 776                 }
 777
 778                 Addr offset = bits(vaddr - base, size - 1, 0);
 779                 Addr endOffset = offset + req->getSize() - 1;
 780
 781                 if (expandDown) {
 782                     DPRINTF(GPUTLB, "Checking an expand down segment.\n");
 783                     warn_once("Expand down segments are untested.\n");
 784
 785                     if (offset <= limit || endOffset <= limit)
 786                         return std::make_shared<GeneralProtection>(0);
 787                 } else {
 788                     if (offset > limit || endOffset > limit)
 789                         return std::make_shared<GeneralProtection>(0);
 790                 }
 791             }
 792
 793             // If paging is enabled, do the translation.
 794             if (m5Reg.paging) {
 795                 DPRINTF(GPUTLB, "Paging enabled.\n");
 796                 // The vaddr already has the segment base applied.
 797                 TlbEntry *entry = lookup(vaddr);
 798                 localNumTLBAccesses++;
 799
 800                 if (!entry) {
 801                     localNumTLBMisses++;
 802                     if (timing) {
 803                         latency = missLatency1;
 804                     }
 805
 806                     if (FullSystem) {
 807                         fatal("GpuTLB doesn't support full-system mode\n");
 808                     } else {
 809                         DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
 810                                 "at pc %#x.\n", vaddr, tc->instAddr());
 811
 812                         Process *p = tc->getProcessPtr();
 813                         const EmulationPageTable::Entry *pte =
 814                             p->pTable->lookup(vaddr);
 815
 816                         if (!pte && mode != BaseTLB::Execute) {
 817                             // penalize a "page fault" more
 818                             if (timing)
 819                                 latency += missLatency2;
 820
 821                             if (p->fixupStackFault(vaddr))
 822                                 pte = p->pTable->lookup(vaddr);
 823                         }
 824
 825                         if (!pte) {
 826                             return std::make_shared<PageFault>(vaddr, true,
 827                                                                mode, true,
 828                                                                false);
 829                         } else {
 830                             Addr alignedVaddr = p->pTable->pageAlign(vaddr);
 831
 832                             DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
 833                                     alignedVaddr, pte->paddr);
 834
 835                             TlbEntry gpuEntry(p->pid(), alignedVaddr,
 836                                               pte->paddr, false, false);
 837                             entry = insert(alignedVaddr, gpuEntry);
 838                         }
 839
 840                         DPRINTF(GPUTLB, "Miss was serviced.\n");
 841                     }
 842                 } else {
 843                     localNumTLBHits++;
 844
 845                     if (timing) {
 846                         latency = hitLatency;
 847                     }
 848                 }
 849
 850                 // Do paging protection checks.
 851                 bool inUser = (m5Reg.cpl == 3 &&
 852                                !(flags & (CPL0FlagBit << FlagShift)));
 853
 854                 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
 855                 bool badWrite = (!entry->writable && (inUser || cr0.wp));
 856
 857                 if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
 858                      badWrite)) {
 859                     // The page must have been present to get into the TLB in
 860                     // the first place. We'll assume the reserved bits are
 861                     // fine even though we're not checking them.
 862                     return std::make_shared<PageFault>(vaddr, true, mode,
 863                                                        inUser, false);
 864                 }
 865
 866                 if (storeCheck && badWrite) {
 867                     // This would fault if this were a write, so return a page
 868                     // fault that reflects that happening.
 869                     return std::make_shared<PageFault>(vaddr, true,
 870                                                        BaseTLB::Write,
 871                                                        inUser, false);
 872                 }
 873
 874
 875                 DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
 876                         "checks.\n", entry->paddr);
 877
 878                 int page_size = entry->size();
 879                 Addr paddr = entry->paddr | (vaddr & (page_size - 1));
 880                 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
 881                 req->setPaddr(paddr);
 882
 883                 if (entry->uncacheable)
 884                     req->setFlags(Request::UNCACHEABLE);
 885             } else {
 886                 //Use the address which already has segmentation applied.
 887                 DPRINTF(GPUTLB, "Paging disabled.\n");
 888                 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
 889                 req->setPaddr(vaddr);
 890             }
 891         } else {
 892             // Real mode
 893             DPRINTF(GPUTLB, "In real mode.\n");
 894             DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
 895             req->setPaddr(vaddr);
 896         }
 897
 898         // Check for an access to the local APIC
 899         if (FullSystem) {
 900             LocalApicBase localApicBase =
 901                 tc->readMiscRegNoEffect(MISCREG_APIC_BASE);
 902
 903             Addr baseAddr = localApicBase.base * PageBytes;
 904             Addr paddr = req->getPaddr();
 905
 906             if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
 907                 // Force the access to be uncacheable.
 908                 req->setFlags(Request::UNCACHEABLE);
 909                 req->setPaddr(x86LocalAPICAddress(tc->contextId(),
 910                                                   paddr - baseAddr));
 911             }
 912         }
 913
 914         return NoFault;
 915     };
 916
 917     Fault
 918     GpuTLB::translateAtomic(const RequestPtr &req, ThreadContext *tc,
 919                             Mode mode, int &latency)
 920     {
 921         bool delayedResponse;
 922
 923         return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
 924                                  latency);
 925     }
 926
 927     void
 928     GpuTLB::translateTiming(const RequestPtr &req, ThreadContext *tc,
 929             Translation *translation, Mode mode, int &latency)
 930     {
 931         bool delayedResponse;
 932         assert(translation);
 933
 934         Fault fault = GpuTLB::translate(req, tc, translation, mode,
 935                                         delayedResponse, true, latency);
 936
 937         if (!delayedResponse)
 938             translation->finish(fault, req, tc, mode);
 939     }
 940
 941     Walker*
 942     GpuTLB::getWalker()
 943     {
 944         return walker;
 945     }
 946
 947
 948     void
 949     GpuTLB::serialize(CheckpointOut &cp) const
 950     {
 951     }
 952
 953     void
 954     GpuTLB::unserialize(CheckpointIn &cp)
 955     {
 956     }
 957
 958     void
 959     GpuTLB::regStats()
 960     {
 961         MemObject::regStats();
 962
 963         localNumTLBAccesses
 964             .name(name() + ".local_TLB_accesses")
 965             .desc("Number of TLB accesses")
 966             ;
 967
 968         localNumTLBHits
 969             .name(name() + ".local_TLB_hits")
 970             .desc("Number of TLB hits")
 971             ;
 972
 973         localNumTLBMisses
 974             .name(name() + ".local_TLB_misses")
 975             .desc("Number of TLB misses")
 976             ;
 977
 978         localTLBMissRate
 979             .name(name() + ".local_TLB_miss_rate")
 980             .desc("TLB miss rate")
 981             ;
 982
 983         accessCycles
 984             .name(name() + ".access_cycles")
 985             .desc("Cycles spent accessing this TLB level")
 986             ;
 987
 988         pageTableCycles
 989             .name(name() + ".page_table_cycles")
 990             .desc("Cycles spent accessing the page table")
 991             ;
 992
 993         localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
 994
 995         numUniquePages
 996             .name(name() + ".unique_pages")
 997             .desc("Number of unique pages touched")
 998             ;
 999
1000         localCycles
1001             .name(name() + ".local_cycles")
1002             .desc("Number of cycles spent in queue for all incoming reqs")
1003             ;
1004
1005         localLatency
1006             .name(name() + ".local_latency")
1007             .desc("Avg. latency over incoming coalesced reqs")
1008             ;
1009
1010         localLatency = localCycles / localNumTLBAccesses;
1011
1012         globalNumTLBAccesses
1013             .name(name() + ".global_TLB_accesses")
1014             .desc("Number of TLB accesses")
1015             ;
1016
1017         globalNumTLBHits
1018             .name(name() + ".global_TLB_hits")
1019             .desc("Number of TLB hits")
1020             ;
1021
1022         globalNumTLBMisses
1023             .name(name() + ".global_TLB_misses")
1024             .desc("Number of TLB misses")
1025             ;
1026
1027         globalTLBMissRate
1028             .name(name() + ".global_TLB_miss_rate")
1029             .desc("TLB miss rate")
1030             ;
1031
1032         globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
1033
1034         avgReuseDistance
1035             .name(name() + ".avg_reuse_distance")
1036             .desc("avg. reuse distance over all pages (in ticks)")
1037             ;
1038
1039     }
1040
1041     /**
1042      * Do the TLB lookup for this coalesced request and schedule
1043      * another event <TLB access latency> cycles later.
1044      */
1045
1046     void
1047     GpuTLB::issueTLBLookup(PacketPtr pkt)
1048     {
1049         assert(pkt);
1050         assert(pkt->senderState);
1051
1052         Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1053                                         TheISA::PageBytes);
1054
1055         TranslationState *sender_state =
1056                 safe_cast<TranslationState*>(pkt->senderState);
1057
1058         bool update_stats = !sender_state->prefetch;
1059         ThreadContext * tmp_tc = sender_state->tc;
1060
1061         DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
1062                 virt_page_addr);
1063
1064         int req_cnt = sender_state->reqCnt.back();
1065
1066         if (update_stats) {
1067             accessCycles -= (curTick() * req_cnt);
1068             localCycles -= curTick();
1069             updatePageFootprint(virt_page_addr);
1070             globalNumTLBAccesses += req_cnt;
1071         }
1072
1073         tlbOutcome lookup_outcome = TLB_MISS;
1074         const RequestPtr &tmp_req = pkt->req;
1075
1076         // Access the TLB and figure out if it's a hit or a miss.
1077         bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
1078
1079         if (success) {
1080             lookup_outcome = TLB_HIT;
1081             // Put the entry in SenderState
1082             TlbEntry *entry = lookup(tmp_req->getVaddr(), false);
1083             assert(entry);
1084
1085             auto p = sender_state->tc->getProcessPtr();
1086             sender_state->tlbEntry =
1087                 new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1088                              false, false);
1089
1090             if (update_stats) {
1091                 // the reqCnt has an entry per level, so its size tells us
1092                 // which level we are in
1093                 sender_state->hitLevel = sender_state->reqCnt.size();
1094                 globalNumTLBHits += req_cnt;
1095             }
1096         } else {
1097             if (update_stats)
1098                 globalNumTLBMisses += req_cnt;
1099         }
1100
1101         /*
1102          * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1103          * as the TLB access latency.
1104          *
1105          * We create and schedule a new TLBEvent which will help us take the
1106          * appropriate actions (e.g., update TLB on a hit, send request to lower
1107          * level TLB on a miss, or start a page walk if this was the last-level
1108          * TLB)
1109          */
1110         TLBEvent *tlb_event =
1111             new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
1112
1113         if (translationReturnEvent.count(virt_page_addr)) {
1114             panic("Virtual Page Address %#x already has a return event\n",
1115                   virt_page_addr);
1116         }
1117
1118         translationReturnEvent[virt_page_addr] = tlb_event;
1119         assert(tlb_event);
1120
1121         DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
1122                 curTick() + this->ticks(hitLatency));
1123
1124         schedule(tlb_event, curTick() + this->ticks(hitLatency));
1125     }
1126
1127     GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome,
1128                                PacketPtr _pkt)
1129         : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
1130         outcome(tlb_outcome), pkt(_pkt)
1131     {
1132     }
1133
1134     /**
1135      * Do Paging protection checks. If we encounter a page fault, then
1136      * an assertion is fired.
1137      */
1138     void
1139     GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
1140             TlbEntry * tlb_entry, Mode mode)
1141     {
1142         HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
1143         uint32_t flags = pkt->req->getFlags();
1144         bool storeCheck = flags & (StoreCheck << FlagShift);
1145
1146         // Do paging protection checks.
1147         bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
1148         CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
1149
1150         bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
1151
1152         if ((inUser && !tlb_entry->user) ||
1153             (mode == BaseTLB::Write && badWrite)) {
1154             // The page must have been present to get into the TLB in
1155             // the first place. We'll assume the reserved bits are
1156             // fine even though we're not checking them.
1157             panic("Page fault detected");
1158         }
1159
1160         if (storeCheck && badWrite) {
1161             // This would fault if this were a write, so return a page
1162             // fault that reflects that happening.
1163             panic("Page fault detected");
1164         }
1165     }
1166
1167     /**
1168      * handleTranslationReturn is called on a TLB hit,
1169      * when a TLB miss returns or when a page fault returns.
1170      * The latter calls handelHit with TLB miss as tlbOutcome.
1171      */
1172     void
1173     GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
1174             PacketPtr pkt)
1175     {
1176
1177         assert(pkt);
1178         Addr vaddr = pkt->req->getVaddr();
1179
1180         TranslationState *sender_state =
1181             safe_cast<TranslationState*>(pkt->senderState);
1182
1183         ThreadContext *tc = sender_state->tc;
1184         Mode mode = sender_state->tlbMode;
1185
1186         TlbEntry *local_entry, *new_entry;
1187
1188         if (tlb_outcome == TLB_HIT) {
1189             DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
1190             local_entry = sender_state->tlbEntry;
1191         } else {
1192             DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
1193                     vaddr);
1194
1195             // We are returning either from a page walk or from a hit at a lower
1196             // TLB level. The senderState should be "carrying" a pointer to the
1197             // correct TLBEntry.
1198             new_entry = sender_state->tlbEntry;
1199             assert(new_entry);
1200             local_entry = new_entry;
1201
1202             if (allocationPolicy) {
1203                 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1204                         virt_page_addr);
1205
1206                 local_entry = insert(virt_page_addr, *new_entry);
1207             }
1208
1209             assert(local_entry);
1210         }
1211
1212         /**
1213          * At this point the packet carries an up-to-date tlbEntry pointer
1214          * in its senderState.
1215          * Next step is to do the paging protection checks.
1216          */
1217         DPRINTF(GPUTLB, "Entry found with vaddr %#x,  doing protection checks "
1218                 "while paddr was %#x.\n", local_entry->vaddr,
1219                 local_entry->paddr);
1220
1221         pagingProtectionChecks(tc, pkt, local_entry, mode);
1222         int page_size = local_entry->size();
1223         Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1224         DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1225
1226         // Since this packet will be sent through the cpu side slave port,
1227         // it must be converted to a response pkt if it is not one already
1228         if (pkt->isRequest()) {
1229             pkt->makeTimingResponse();
1230         }
1231
1232         pkt->req->setPaddr(paddr);
1233
1234         if (local_entry->uncacheable) {
1235              pkt->req->setFlags(Request::UNCACHEABLE);
1236         }
1237
1238         //send packet back to coalescer
1239         cpuSidePort[0]->sendTimingResp(pkt);
1240         //schedule cleanup event
1241         cleanupQueue.push(virt_page_addr);
1242
1243         // schedule this only once per cycle.
1244         // The check is required because we might have multiple translations
1245         // returning the same cycle
1246         // this is a maximum priority event and must be on the same cycle
1247         // as the cleanup event in TLBCoalescer to avoid a race with
1248         // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1249         if (!cleanupEvent.scheduled())
1250             schedule(cleanupEvent, curTick());
1251     }
1252
1253     /**
1254      * Here we take the appropriate actions based on the result of the
1255      * TLB lookup.
1256      */
1257     void
1258     GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
1259                               PacketPtr pkt)
1260     {
1261         DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
1262
1263         assert(translationReturnEvent[virtPageAddr]);
1264         assert(pkt);
1265
1266         TranslationState *tmp_sender_state =
1267             safe_cast<TranslationState*>(pkt->senderState);
1268
1269         int req_cnt = tmp_sender_state->reqCnt.back();
1270         bool update_stats = !tmp_sender_state->prefetch;
1271
1272
1273         if (outcome == TLB_HIT) {
1274             handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
1275
1276             if (update_stats) {
1277                 accessCycles += (req_cnt * curTick());
1278                 localCycles += curTick();
1279             }
1280
1281         } else if (outcome == TLB_MISS) {
1282
1283             DPRINTF(GPUTLB, "This is a TLB miss\n");
1284             if (update_stats) {
1285                 accessCycles += (req_cnt*curTick());
1286                 localCycles += curTick();
1287             }
1288
1289             if (hasMemSidePort) {
1290                 // the one cyle added here represent the delay from when we get
1291                 // the reply back till when we propagate it to the coalescer
1292                 // above.
1293                 if (update_stats) {
1294                     accessCycles += (req_cnt * 1);
1295                     localCycles += 1;
1296                 }
1297
1298                 /**
1299                  * There is a TLB below. Send the coalesced request.
1300                  * We actually send the very first packet of all the
1301                  * pending packets for this virtual page address.
1302                  */
1303                 if (!memSidePort[0]->sendTimingReq(pkt)) {
1304                     DPRINTF(GPUTLB, "Failed sending translation request to "
1305                             "lower level TLB for addr %#x\n", virtPageAddr);
1306
1307                     memSidePort[0]->retries.push_back(pkt);
1308                 } else {
1309                     DPRINTF(GPUTLB, "Sent translation request to lower level "
1310                             "TLB for addr %#x\n", virtPageAddr);
1311                 }
1312             } else {
1313                 //this is the last level TLB. Start a page walk
1314                 DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1315                         "addr %#x\n", virtPageAddr);
1316
1317                 if (update_stats)
1318                     pageTableCycles -= (req_cnt*curTick());
1319
1320                 TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
1321                 assert(tlb_event);
1322                 tlb_event->updateOutcome(PAGE_WALK);
1323                 schedule(tlb_event, curTick() + ticks(missLatency2));
1324             }
1325         } else if (outcome == PAGE_WALK) {
1326             if (update_stats)
1327                 pageTableCycles += (req_cnt*curTick());
1328
1329             // Need to access the page table and update the TLB
1330             DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1331                     virtPageAddr);
1332
1333             TranslationState *sender_state =
1334                 safe_cast<TranslationState*>(pkt->senderState);
1335
1336             Process *p = sender_state->tc->getProcessPtr();
1337             Addr vaddr = pkt->req->getVaddr();
1338     #ifndef NDEBUG
1339             Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1340             assert(alignedVaddr == virtPageAddr);
1341     #endif
1342             const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr);
1343             if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1344                     p->fixupStackFault(vaddr)) {
1345                 pte = p->pTable->lookup(vaddr);
1346             }
1347
1348             if (pte) {
1349                 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1350                         pte->paddr);
1351
1352                 sender_state->tlbEntry =
1353                     new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false,
1354                                  false);
1355             } else {
1356                 sender_state->tlbEntry = nullptr;
1357             }
1358
1359             handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1360         } else if (outcome == MISS_RETURN) {
1361             /** we add an extra cycle in the return path of the translation
1362              * requests in between the various TLB levels.
1363              */
1364             handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1365         } else {
1366             panic("Unexpected TLB outcome %d", outcome);
1367         }
1368     }
1369
1370     void
1371     GpuTLB::TLBEvent::process()
1372     {
1373         tlb->translationReturn(virtPageAddr, outcome, pkt);
1374     }
1375
1376     const char*
1377     GpuTLB::TLBEvent::description() const
1378     {
1379         return "trigger translationDoneEvent";
1380     }
1381
1382     void
1383     GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
1384     {
1385         outcome = _outcome;
1386     }
1387
1388     Addr
1389     GpuTLB::TLBEvent::getTLBEventVaddr()
1390     {
1391         return virtPageAddr;
1392     }
1393
1394     /*
1395      * recvTiming receives a coalesced timing request from a TLBCoalescer
1396      * and it calls issueTLBLookup()
1397      * It only rejects the packet if we have exceeded the max
1398      * outstanding number of requests for the TLB
1399      */
1400     bool
1401     GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
1402     {
1403         if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
1404             tlb->issueTLBLookup(pkt);
1405             // update number of outstanding translation requests
1406             tlb->outstandingReqs++;
1407             return true;
1408          } else {
1409             DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1410                     tlb->outstandingReqs);
1411             return false;
1412          }
1413     }
1414
1415     /**
1416      * handleFuncTranslationReturn is called on a TLB hit,
1417      * when a TLB miss returns or when a page fault returns.
1418      * It updates LRU, inserts the TLB entry on a miss
1419      * depending on the allocation policy and does the required
1420      * protection checks. It does NOT create a new packet to
1421      * update the packet's addr; this is done in hsail-gpu code.
1422      */
1423     void
1424     GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
1425     {
1426         TranslationState *sender_state =
1427             safe_cast<TranslationState*>(pkt->senderState);
1428
1429         ThreadContext *tc = sender_state->tc;
1430         Mode mode = sender_state->tlbMode;
1431         Addr vaddr = pkt->req->getVaddr();
1432
1433         TlbEntry *local_entry, *new_entry;
1434
1435         if (tlb_outcome == TLB_HIT) {
1436             DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1437                     "%#x\n", vaddr);
1438
1439             local_entry = sender_state->tlbEntry;
1440         } else {
1441             DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1442                     "%#x\n", vaddr);
1443
1444             // We are returning either from a page walk or from a hit at a lower
1445             // TLB level. The senderState should be "carrying" a pointer to the
1446             // correct TLBEntry.
1447             new_entry = sender_state->tlbEntry;
1448             assert(new_entry);
1449             local_entry = new_entry;
1450
1451             if (allocationPolicy) {
1452                 Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1453
1454                 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1455                         virt_page_addr);
1456
1457                 local_entry = insert(virt_page_addr, *new_entry);
1458             }
1459
1460             assert(local_entry);
1461         }
1462
1463         DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1464                 "while paddr was %#x.\n", local_entry->vaddr,
1465                 local_entry->paddr);
1466
1467         /**
1468          * Do paging checks if it's a normal functional access.  If it's for a
1469          * prefetch, then sometimes you can try to prefetch something that
1470          * won't pass protection. We don't actually want to fault becuase there
1471          * is no demand access to deem this a violation.  Just put it in the
1472          * TLB and it will fault if indeed a future demand access touches it in
1473          * violation.
1474          *
1475          * This feature could be used to explore security issues around
1476          * speculative memory accesses.
1477          */
1478         if (!sender_state->prefetch && sender_state->tlbEntry)
1479             pagingProtectionChecks(tc, pkt, local_entry, mode);
1480
1481         int page_size = local_entry->size();
1482         Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1483         DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1484
1485         pkt->req->setPaddr(paddr);
1486
1487         if (local_entry->uncacheable)
1488              pkt->req->setFlags(Request::UNCACHEABLE);
1489     }
1490
1491     // This is used for atomic translations. Need to
1492     // make it all happen during the same cycle.
1493     void
1494     GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
1495     {
1496         TranslationState *sender_state =
1497             safe_cast<TranslationState*>(pkt->senderState);
1498
1499         ThreadContext *tc = sender_state->tc;
1500         bool update_stats = !sender_state->prefetch;
1501
1502         Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1503                                         TheISA::PageBytes);
1504
1505         if (update_stats)
1506             tlb->updatePageFootprint(virt_page_addr);
1507
1508         // do the TLB lookup without updating the stats
1509         bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1510         tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1511
1512         // functional mode means no coalescing
1513         // global metrics are the same as the local metrics
1514         if (update_stats) {
1515             tlb->globalNumTLBAccesses++;
1516
1517             if (success) {
1518                 sender_state->hitLevel = sender_state->reqCnt.size();
1519                 tlb->globalNumTLBHits++;
1520             }
1521         }
1522
1523         if (!success) {
1524             if (update_stats)
1525                 tlb->globalNumTLBMisses++;
1526             if (tlb->hasMemSidePort) {
1527                 // there is a TLB below -> propagate down the TLB hierarchy
1528                 tlb->memSidePort[0]->sendFunctional(pkt);
1529                 // If no valid translation from a prefetch, then just return
1530                 if (sender_state->prefetch && !pkt->req->hasPaddr())
1531                     return;
1532             } else {
1533                 // Need to access the page table and update the TLB
1534                 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1535                         virt_page_addr);
1536
1537                 Process *p = tc->getProcessPtr();
1538
1539                 Addr vaddr = pkt->req->getVaddr();
1540     #ifndef NDEBUG
1541                 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1542                 assert(alignedVaddr == virt_page_addr);
1543     #endif
1544
1545                 const EmulationPageTable::Entry *pte =
1546                         p->pTable->lookup(vaddr);
1547                 if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1548                         p->fixupStackFault(vaddr)) {
1549                     pte = p->pTable->lookup(vaddr);
1550                 }
1551
1552                 if (!sender_state->prefetch) {
1553                     // no PageFaults are permitted after
1554                     // the second page table lookup
1555                     assert(pte);
1556
1557                     DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1558                             pte->paddr);
1559
1560                     sender_state->tlbEntry =
1561                         new TlbEntry(p->pid(), virt_page_addr,
1562                                      pte->paddr, false, false);
1563                 } else {
1564                     // If this was a prefetch, then do the normal thing if it
1565                     // was a successful translation.  Otherwise, send an empty
1566                     // TLB entry back so that it can be figured out as empty and
1567                     // handled accordingly.
1568                     if (pte) {
1569                         DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1570                                 pte->paddr);
1571
1572                         sender_state->tlbEntry =
1573                             new TlbEntry(p->pid(), virt_page_addr,
1574                                          pte->paddr, false, false);
1575                     } else {
1576                         DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1577                                 alignedVaddr);
1578
1579                         sender_state->tlbEntry = nullptr;
1580
1581                         return;
1582                     }
1583                 }
1584             }
1585         } else {
1586             DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1587                     tlb->lookup(pkt->req->getVaddr()));
1588
1589             TlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1590                                              update_stats);
1591
1592             assert(entry);
1593
1594             auto p = sender_state->tc->getProcessPtr();
1595             sender_state->tlbEntry =
1596                 new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1597                              false, false);
1598         }
1599         // This is the function that would populate pkt->req with the paddr of
1600         // the translation. But if no translation happens (i.e Prefetch fails)
1601         // then the early returns in the above code wiill keep this function
1602         // from executing.
1603         tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1604     }
1605
1606     void
1607     GpuTLB::CpuSidePort::recvReqRetry()
1608     {
1609         // The CPUSidePort never sends anything but replies. No retries
1610         // expected.
1611         panic("recvReqRetry called");
1612     }
1613
1614     AddrRangeList
1615     GpuTLB::CpuSidePort::getAddrRanges() const
1616     {
1617         // currently not checked by the master
1618         AddrRangeList ranges;
1619
1620         return ranges;
1621     }
1622
1623     /**
1624      * MemSidePort receives the packet back.
1625      * We need to call the handleTranslationReturn
1626      * and propagate up the hierarchy.
1627      */
1628     bool
1629     GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
1630     {
1631         Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1632                                         TheISA::PageBytes);
1633
1634         DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1635                 virt_page_addr);
1636
1637         TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1638         assert(tlb_event);
1639         assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1640
1641         tlb_event->updateOutcome(MISS_RETURN);
1642         tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
1643
1644         return true;
1645     }
1646
1647     void
1648     GpuTLB::MemSidePort::recvReqRetry()
1649     {
1650         // No retries should reach the TLB. The retries
1651         // should only reach the TLBCoalescer.
1652         panic("recvReqRetry called");
1653     }
1654
1655     void
1656     GpuTLB::cleanup()
1657     {
1658         while (!cleanupQueue.empty()) {
1659             Addr cleanup_addr = cleanupQueue.front();
1660             cleanupQueue.pop();
1661
1662             // delete TLBEvent
1663             TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1664             delete old_tlb_event;
1665             translationReturnEvent.erase(cleanup_addr);
1666
1667             // update number of outstanding requests
1668             outstandingReqs--;
1669         }
1670
1671         /** the higher level coalescer should retry if it has
1672          * any pending requests.
1673          */
1674         for (int i = 0; i < cpuSidePort.size(); ++i) {
1675             cpuSidePort[i]->sendRetryReq();
1676         }
1677     }
1678
1679     void
1680     GpuTLB::updatePageFootprint(Addr virt_page_addr)
1681     {
1682
1683         std::pair<AccessPatternTable::iterator, bool> ret;
1684
1685         AccessInfo tmp_access_info;
1686         tmp_access_info.lastTimeAccessed = 0;
1687         tmp_access_info.accessesPerPage = 0;
1688         tmp_access_info.totalReuseDistance = 0;
1689         tmp_access_info.sumDistance = 0;
1690         tmp_access_info.meanDistance = 0;
1691
1692         ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1693                                   tmp_access_info));
1694
1695         bool first_page_access = ret.second;
1696
1697         if (first_page_access) {
1698             numUniquePages++;
1699         } else  {
1700             int accessed_before;
1701             accessed_before  = curTick() - ret.first->second.lastTimeAccessed;
1702             ret.first->second.totalReuseDistance += accessed_before;
1703         }
1704
1705         ret.first->second.accessesPerPage++;
1706         ret.first->second.lastTimeAccessed = curTick();
1707
1708         if (accessDistance) {
1709             ret.first->second.localTLBAccesses
1710                 .push_back(localNumTLBAccesses.value());
1711         }
1712     }
1713
1714     void
1715     GpuTLB::exitCallback()
1716     {
1717         std::ostream *page_stat_file = nullptr;
1718
1719         if (accessDistance) {
1720
1721             // print per page statistics to a separate file (.csv format)
1722             // simout is the gem5 output directory (default is m5out or the one
1723             // specified with -d
1724             page_stat_file = simout.create(name().c_str())->stream();
1725
1726             // print header
1727             *page_stat_file << "page,max_access_distance,mean_access_distance, "
1728                             << "stddev_distance" << std::endl;
1729         }
1730
1731         // update avg. reuse distance footprint
1732         AccessPatternTable::iterator iter, iter_begin, iter_end;
1733         unsigned int sum_avg_reuse_distance_per_page = 0;
1734
1735         // iterate through all pages seen by this TLB
1736         for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
1737             sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1738                                                iter->second.accessesPerPage;
1739
1740             if (accessDistance) {
1741                 unsigned int tmp = iter->second.localTLBAccesses[0];
1742                 unsigned int prev = tmp;
1743
1744                 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1745                     if (i) {
1746                         tmp = prev + 1;
1747                     }
1748
1749                     prev = iter->second.localTLBAccesses[i];
1750                     // update the localTLBAccesses value
1751                     // with the actual differece
1752                     iter->second.localTLBAccesses[i] -= tmp;
1753                     // compute the sum of AccessDistance per page
1754                     // used later for mean
1755                     iter->second.sumDistance +=
1756                         iter->second.localTLBAccesses[i];
1757                 }
1758
1759                 iter->second.meanDistance =
1760                     iter->second.sumDistance / iter->second.accessesPerPage;
1761
1762                 // compute std_dev and max  (we need a second round because we
1763                 // need to know the mean value
1764                 unsigned int max_distance = 0;
1765                 unsigned int stddev_distance = 0;
1766
1767                 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1768                     unsigned int tmp_access_distance =
1769                         iter->second.localTLBAccesses[i];
1770
1771                     if (tmp_access_distance > max_distance) {
1772                         max_distance = tmp_access_distance;
1773                     }
1774
1775                     unsigned int diff =
1776                         tmp_access_distance - iter->second.meanDistance;
1777                     stddev_distance += pow(diff, 2);
1778
1779                 }
1780
1781                 stddev_distance =
1782                     sqrt(stddev_distance/iter->second.accessesPerPage);
1783
1784                 if (page_stat_file) {
1785                     *page_stat_file << std::hex << iter->first << ",";
1786                     *page_stat_file << std::dec << max_distance << ",";
1787                     *page_stat_file << std::dec << iter->second.meanDistance
1788                                     << ",";
1789                     *page_stat_file << std::dec << stddev_distance;
1790                     *page_stat_file << std::endl;
1791                 }
1792
1793                 // erase the localTLBAccesses array
1794                 iter->second.localTLBAccesses.clear();
1795             }
1796         }
1797
1798         if (!TLBFootprint.empty()) {
1799             avgReuseDistance =
1800                 sum_avg_reuse_distance_per_page / TLBFootprint.size();
1801         }
1802
1803         //clear the TLBFootprint map
1804         TLBFootprint.clear();
1805     }
1806 } // namespace X86ISA
1807
1808 X86ISA::GpuTLB*
1809 X86GPUTLBParams::create()
1810 {
1811     return new X86ISA::GpuTLB(this);
1812 }
1813