src/gpu-compute/gpu_tlb.cc

   1 /*
   2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
   3  * All rights reserved.
   4  *
   5  * For use for simulation and test purposes only
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions are met:
   9  *
  10  * 1. Redistributions of source code must retain the above copyright notice,
  11  * this list of conditions and the following disclaimer.
  12  *
  13  * 2. Redistributions in binary form must reproduce the above copyright notice,
  14  * this list of conditions and the following disclaimer in the documentation
  15  * and/or other materials provided with the distribution.
  16  *
  17  * 3. Neither the name of the copyright holder nor the names of its contributors
  18  * may be used to endorse or promote products derived from this software
  19  * without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31  * POSSIBILITY OF SUCH DAMAGE.
  32  *
  33  * Author: Lisa Hsu
  34  */
  35
  36 #include "gpu-compute/gpu_tlb.hh"
  37
  38 #include <cmath>
  39 #include <cstring>
  40
  41 #include "arch/x86/faults.hh"
  42 #include "arch/x86/insts/microldstop.hh"
  43 #include "arch/x86/pagetable.hh"
  44 #include "arch/x86/pagetable_walker.hh"
  45 #include "arch/x86/regs/misc.hh"
  46 #include "arch/x86/x86_traits.hh"
  47 #include "base/bitfield.hh"
  48 #include "base/logging.hh"
  49 #include "base/output.hh"
  50 #include "base/trace.hh"
  51 #include "cpu/base.hh"
  52 #include "cpu/thread_context.hh"
  53 #include "debug/GPUPrefetch.hh"
  54 #include "debug/GPUTLB.hh"
  55 #include "mem/packet_access.hh"
  56 #include "mem/page_table.hh"
  57 #include "mem/request.hh"
  58 #include "sim/process.hh"
  59
  60 namespace X86ISA
  61 {
  62
  63     GpuTLB::GpuTLB(const Params *p)
  64         : ClockedObject(p), configAddress(0), size(p->size),
  65           cleanupEvent([this]{ cleanup(); }, name(), false,
  66                        Event::Maximum_Pri),
  67           exitEvent([this]{ exitCallback(); }, name())
  68     {
  69         assoc = p->assoc;
  70         assert(assoc <= size);
  71         numSets = size/assoc;
  72         allocationPolicy = p->allocationPolicy;
  73         hasMemSidePort = false;
  74         accessDistance = p->accessDistance;
  75         clock = p->clk_domain->clockPeriod();
  76
  77         tlb.assign(size, TlbEntry());
  78
  79         freeList.resize(numSets);
  80         entryList.resize(numSets);
  81
  82         for (int set = 0; set < numSets; ++set) {
  83             for (int way = 0; way < assoc; ++way) {
  84                 int x = set * assoc + way;
  85                 freeList[set].push_back(&tlb.at(x));
  86             }
  87         }
  88
  89         FA = (size == assoc);
  90
  91         /**
  92          * @warning: the set-associative version assumes you have a
  93          * fixed page size of 4KB.
  94          * If the page size is greather than 4KB (as defined in the
  95          * TheISA::PageBytes), then there are various issues w/ the current
  96          * implementation (you'd have the same 8KB page being replicated in
  97          * different sets etc)
  98          */
  99         setMask = numSets - 1;
 100
 101         maxCoalescedReqs = p->maxOutstandingReqs;
 102
 103         // Do not allow maxCoalescedReqs to be more than the TLB associativity
 104         if (maxCoalescedReqs > assoc) {
 105             maxCoalescedReqs = assoc;
 106             cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
 107         }
 108
 109         outstandingReqs = 0;
 110         hitLatency = p->hitLatency;
 111         missLatency1 = p->missLatency1;
 112         missLatency2 = p->missLatency2;
 113
 114         // create the slave ports based on the number of connected ports
 115         for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
 116             cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
 117                                   name(), i), this, i));
 118         }
 119
 120         // create the master ports based on the number of connected ports
 121         for (size_t i = 0; i < p->port_master_connection_count; ++i) {
 122             memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
 123                                   name(), i), this, i));
 124         }
 125     }
 126
 127     // fixme: this is never called?
 128     GpuTLB::~GpuTLB()
 129     {
 130         // make sure all the hash-maps are empty
 131         assert(translationReturnEvent.empty());
 132     }
 133
 134     Port &
 135     GpuTLB::getPort(const std::string &if_name, PortID idx)
 136     {
 137         if (if_name == "slave") {
 138             if (idx >= static_cast<PortID>(cpuSidePort.size())) {
 139                 panic("TLBCoalescer::getPort: unknown index %d\n", idx);
 140             }
 141
 142             return *cpuSidePort[idx];
 143         } else if (if_name == "master") {
 144             if (idx >= static_cast<PortID>(memSidePort.size())) {
 145                 panic("TLBCoalescer::getPort: unknown index %d\n", idx);
 146             }
 147
 148             hasMemSidePort = true;
 149
 150             return *memSidePort[idx];
 151         } else {
 152             panic("TLBCoalescer::getPort: unknown port %s\n", if_name);
 153         }
 154     }
 155
 156     TlbEntry*
 157     GpuTLB::insert(Addr vpn, TlbEntry &entry)
 158     {
 159         TlbEntry *newEntry = nullptr;
 160
 161         /**
 162          * vpn holds the virtual page address
 163          * The least significant bits are simply masked
 164          */
 165         int set = (vpn >> TheISA::PageShift) & setMask;
 166
 167         if (!freeList[set].empty()) {
 168             newEntry = freeList[set].front();
 169             freeList[set].pop_front();
 170         } else {
 171             newEntry = entryList[set].back();
 172             entryList[set].pop_back();
 173         }
 174
 175         *newEntry = entry;
 176         newEntry->vaddr = vpn;
 177         entryList[set].push_front(newEntry);
 178
 179         return newEntry;
 180     }
 181
 182     GpuTLB::EntryList::iterator
 183     GpuTLB::lookupIt(Addr va, bool update_lru)
 184     {
 185         int set = (va >> TheISA::PageShift) & setMask;
 186
 187         if (FA) {
 188             assert(!set);
 189         }
 190
 191         auto entry = entryList[set].begin();
 192         for (; entry != entryList[set].end(); ++entry) {
 193             int page_size = (*entry)->size();
 194
 195             if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
 196                 DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
 197                         "with size %#x.\n", va, (*entry)->vaddr, page_size);
 198
 199                 if (update_lru) {
 200                     entryList[set].push_front(*entry);
 201                     entryList[set].erase(entry);
 202                     entry = entryList[set].begin();
 203                 }
 204
 205                 break;
 206             }
 207         }
 208
 209         return entry;
 210     }
 211
 212     TlbEntry*
 213     GpuTLB::lookup(Addr va, bool update_lru)
 214     {
 215         int set = (va >> TheISA::PageShift) & setMask;
 216
 217         auto entry = lookupIt(va, update_lru);
 218
 219         if (entry == entryList[set].end())
 220             return nullptr;
 221         else
 222             return *entry;
 223     }
 224
 225     void
 226     GpuTLB::invalidateAll()
 227     {
 228         DPRINTF(GPUTLB, "Invalidating all entries.\n");
 229
 230         for (int i = 0; i < numSets; ++i) {
 231             while (!entryList[i].empty()) {
 232                 TlbEntry *entry = entryList[i].front();
 233                 entryList[i].pop_front();
 234                 freeList[i].push_back(entry);
 235             }
 236         }
 237     }
 238
 239     void
 240     GpuTLB::setConfigAddress(uint32_t addr)
 241     {
 242         configAddress = addr;
 243     }
 244
 245     void
 246     GpuTLB::invalidateNonGlobal()
 247     {
 248         DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
 249
 250         for (int i = 0; i < numSets; ++i) {
 251             for (auto entryIt = entryList[i].begin();
 252                  entryIt != entryList[i].end();) {
 253                 if (!(*entryIt)->global) {
 254                     freeList[i].push_back(*entryIt);
 255                     entryList[i].erase(entryIt++);
 256                 } else {
 257                     ++entryIt;
 258                 }
 259             }
 260         }
 261     }
 262
 263     void
 264     GpuTLB::demapPage(Addr va, uint64_t asn)
 265     {
 266
 267         int set = (va >> TheISA::PageShift) & setMask;
 268         auto entry = lookupIt(va, false);
 269
 270         if (entry != entryList[set].end()) {
 271             freeList[set].push_back(*entry);
 272             entryList[set].erase(entry);
 273         }
 274     }
 275
 276     Fault
 277     GpuTLB::translateInt(const RequestPtr &req, ThreadContext *tc)
 278     {
 279         DPRINTF(GPUTLB, "Addresses references internal memory.\n");
 280         Addr vaddr = req->getVaddr();
 281         Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
 282
 283         if (prefix == IntAddrPrefixCPUID) {
 284             panic("CPUID memory space not yet implemented!\n");
 285         } else if (prefix == IntAddrPrefixMSR) {
 286             vaddr = vaddr >> 3;
 287             req->setFlags(Request::MMAPPED_IPR);
 288             Addr regNum = 0;
 289
 290             switch (vaddr & ~IntAddrPrefixMask) {
 291               case 0x10:
 292                 regNum = MISCREG_TSC;
 293                 break;
 294               case 0x1B:
 295                 regNum = MISCREG_APIC_BASE;
 296                 break;
 297               case 0xFE:
 298                 regNum = MISCREG_MTRRCAP;
 299                 break;
 300               case 0x174:
 301                 regNum = MISCREG_SYSENTER_CS;
 302                 break;
 303               case 0x175:
 304                 regNum = MISCREG_SYSENTER_ESP;
 305                 break;
 306               case 0x176:
 307                 regNum = MISCREG_SYSENTER_EIP;
 308                 break;
 309               case 0x179:
 310                 regNum = MISCREG_MCG_CAP;
 311                 break;
 312               case 0x17A:
 313                 regNum = MISCREG_MCG_STATUS;
 314                 break;
 315               case 0x17B:
 316                 regNum = MISCREG_MCG_CTL;
 317                 break;
 318               case 0x1D9:
 319                 regNum = MISCREG_DEBUG_CTL_MSR;
 320                 break;
 321               case 0x1DB:
 322                 regNum = MISCREG_LAST_BRANCH_FROM_IP;
 323                 break;
 324               case 0x1DC:
 325                 regNum = MISCREG_LAST_BRANCH_TO_IP;
 326                 break;
 327               case 0x1DD:
 328                 regNum = MISCREG_LAST_EXCEPTION_FROM_IP;
 329                 break;
 330               case 0x1DE:
 331                 regNum = MISCREG_LAST_EXCEPTION_TO_IP;
 332                 break;
 333               case 0x200:
 334                 regNum = MISCREG_MTRR_PHYS_BASE_0;
 335                 break;
 336               case 0x201:
 337                 regNum = MISCREG_MTRR_PHYS_MASK_0;
 338                 break;
 339               case 0x202:
 340                 regNum = MISCREG_MTRR_PHYS_BASE_1;
 341                 break;
 342               case 0x203:
 343                 regNum = MISCREG_MTRR_PHYS_MASK_1;
 344                 break;
 345               case 0x204:
 346                 regNum = MISCREG_MTRR_PHYS_BASE_2;
 347                 break;
 348               case 0x205:
 349                 regNum = MISCREG_MTRR_PHYS_MASK_2;
 350                 break;
 351               case 0x206:
 352                 regNum = MISCREG_MTRR_PHYS_BASE_3;
 353                 break;
 354               case 0x207:
 355                 regNum = MISCREG_MTRR_PHYS_MASK_3;
 356                 break;
 357               case 0x208:
 358                 regNum = MISCREG_MTRR_PHYS_BASE_4;
 359                 break;
 360               case 0x209:
 361                 regNum = MISCREG_MTRR_PHYS_MASK_4;
 362                 break;
 363               case 0x20A:
 364                 regNum = MISCREG_MTRR_PHYS_BASE_5;
 365                 break;
 366               case 0x20B:
 367                 regNum = MISCREG_MTRR_PHYS_MASK_5;
 368                 break;
 369               case 0x20C:
 370                 regNum = MISCREG_MTRR_PHYS_BASE_6;
 371                 break;
 372               case 0x20D:
 373                 regNum = MISCREG_MTRR_PHYS_MASK_6;
 374                 break;
 375               case 0x20E:
 376                 regNum = MISCREG_MTRR_PHYS_BASE_7;
 377                 break;
 378               case 0x20F:
 379                 regNum = MISCREG_MTRR_PHYS_MASK_7;
 380                 break;
 381               case 0x250:
 382                 regNum = MISCREG_MTRR_FIX_64K_00000;
 383                 break;
 384               case 0x258:
 385                 regNum = MISCREG_MTRR_FIX_16K_80000;
 386                 break;
 387               case 0x259:
 388                 regNum = MISCREG_MTRR_FIX_16K_A0000;
 389                 break;
 390               case 0x268:
 391                 regNum = MISCREG_MTRR_FIX_4K_C0000;
 392                 break;
 393               case 0x269:
 394                 regNum = MISCREG_MTRR_FIX_4K_C8000;
 395                 break;
 396               case 0x26A:
 397                 regNum = MISCREG_MTRR_FIX_4K_D0000;
 398                 break;
 399               case 0x26B:
 400                 regNum = MISCREG_MTRR_FIX_4K_D8000;
 401                 break;
 402               case 0x26C:
 403                 regNum = MISCREG_MTRR_FIX_4K_E0000;
 404                 break;
 405               case 0x26D:
 406                 regNum = MISCREG_MTRR_FIX_4K_E8000;
 407                 break;
 408               case 0x26E:
 409                 regNum = MISCREG_MTRR_FIX_4K_F0000;
 410                 break;
 411               case 0x26F:
 412                 regNum = MISCREG_MTRR_FIX_4K_F8000;
 413                 break;
 414               case 0x277:
 415                 regNum = MISCREG_PAT;
 416                 break;
 417               case 0x2FF:
 418                 regNum = MISCREG_DEF_TYPE;
 419                 break;
 420               case 0x400:
 421                 regNum = MISCREG_MC0_CTL;
 422                 break;
 423               case 0x404:
 424                 regNum = MISCREG_MC1_CTL;
 425                 break;
 426               case 0x408:
 427                 regNum = MISCREG_MC2_CTL;
 428                 break;
 429               case 0x40C:
 430                 regNum = MISCREG_MC3_CTL;
 431                 break;
 432               case 0x410:
 433                 regNum = MISCREG_MC4_CTL;
 434                 break;
 435               case 0x414:
 436                 regNum = MISCREG_MC5_CTL;
 437                 break;
 438               case 0x418:
 439                 regNum = MISCREG_MC6_CTL;
 440                 break;
 441               case 0x41C:
 442                 regNum = MISCREG_MC7_CTL;
 443                 break;
 444               case 0x401:
 445                 regNum = MISCREG_MC0_STATUS;
 446                 break;
 447               case 0x405:
 448                 regNum = MISCREG_MC1_STATUS;
 449                 break;
 450               case 0x409:
 451                 regNum = MISCREG_MC2_STATUS;
 452                 break;
 453               case 0x40D:
 454                 regNum = MISCREG_MC3_STATUS;
 455                 break;
 456               case 0x411:
 457                 regNum = MISCREG_MC4_STATUS;
 458                 break;
 459               case 0x415:
 460                 regNum = MISCREG_MC5_STATUS;
 461                 break;
 462               case 0x419:
 463                 regNum = MISCREG_MC6_STATUS;
 464                 break;
 465               case 0x41D:
 466                 regNum = MISCREG_MC7_STATUS;
 467                 break;
 468               case 0x402:
 469                 regNum = MISCREG_MC0_ADDR;
 470                 break;
 471               case 0x406:
 472                 regNum = MISCREG_MC1_ADDR;
 473                 break;
 474               case 0x40A:
 475                 regNum = MISCREG_MC2_ADDR;
 476                 break;
 477               case 0x40E:
 478                 regNum = MISCREG_MC3_ADDR;
 479                 break;
 480               case 0x412:
 481                 regNum = MISCREG_MC4_ADDR;
 482                 break;
 483               case 0x416:
 484                 regNum = MISCREG_MC5_ADDR;
 485                 break;
 486               case 0x41A:
 487                 regNum = MISCREG_MC6_ADDR;
 488                 break;
 489               case 0x41E:
 490                 regNum = MISCREG_MC7_ADDR;
 491                 break;
 492               case 0x403:
 493                 regNum = MISCREG_MC0_MISC;
 494                 break;
 495               case 0x407:
 496                 regNum = MISCREG_MC1_MISC;
 497                 break;
 498               case 0x40B:
 499                 regNum = MISCREG_MC2_MISC;
 500                 break;
 501               case 0x40F:
 502                 regNum = MISCREG_MC3_MISC;
 503                 break;
 504               case 0x413:
 505                 regNum = MISCREG_MC4_MISC;
 506                 break;
 507               case 0x417:
 508                 regNum = MISCREG_MC5_MISC;
 509                 break;
 510               case 0x41B:
 511                 regNum = MISCREG_MC6_MISC;
 512                 break;
 513               case 0x41F:
 514                 regNum = MISCREG_MC7_MISC;
 515                 break;
 516               case 0xC0000080:
 517                 regNum = MISCREG_EFER;
 518                 break;
 519               case 0xC0000081:
 520                 regNum = MISCREG_STAR;
 521                 break;
 522               case 0xC0000082:
 523                 regNum = MISCREG_LSTAR;
 524                 break;
 525               case 0xC0000083:
 526                 regNum = MISCREG_CSTAR;
 527                 break;
 528               case 0xC0000084:
 529                 regNum = MISCREG_SF_MASK;
 530                 break;
 531               case 0xC0000100:
 532                 regNum = MISCREG_FS_BASE;
 533                 break;
 534               case 0xC0000101:
 535                 regNum = MISCREG_GS_BASE;
 536                 break;
 537               case 0xC0000102:
 538                 regNum = MISCREG_KERNEL_GS_BASE;
 539                 break;
 540               case 0xC0000103:
 541                 regNum = MISCREG_TSC_AUX;
 542                 break;
 543               case 0xC0010000:
 544                 regNum = MISCREG_PERF_EVT_SEL0;
 545                 break;
 546               case 0xC0010001:
 547                 regNum = MISCREG_PERF_EVT_SEL1;
 548                 break;
 549               case 0xC0010002:
 550                 regNum = MISCREG_PERF_EVT_SEL2;
 551                 break;
 552               case 0xC0010003:
 553                 regNum = MISCREG_PERF_EVT_SEL3;
 554                 break;
 555               case 0xC0010004:
 556                 regNum = MISCREG_PERF_EVT_CTR0;
 557                 break;
 558               case 0xC0010005:
 559                 regNum = MISCREG_PERF_EVT_CTR1;
 560                 break;
 561               case 0xC0010006:
 562                 regNum = MISCREG_PERF_EVT_CTR2;
 563                 break;
 564               case 0xC0010007:
 565                 regNum = MISCREG_PERF_EVT_CTR3;
 566                 break;
 567               case 0xC0010010:
 568                 regNum = MISCREG_SYSCFG;
 569                 break;
 570               case 0xC0010016:
 571                 regNum = MISCREG_IORR_BASE0;
 572                 break;
 573               case 0xC0010017:
 574                 regNum = MISCREG_IORR_BASE1;
 575                 break;
 576               case 0xC0010018:
 577                 regNum = MISCREG_IORR_MASK0;
 578                 break;
 579               case 0xC0010019:
 580                 regNum = MISCREG_IORR_MASK1;
 581                 break;
 582               case 0xC001001A:
 583                 regNum = MISCREG_TOP_MEM;
 584                 break;
 585               case 0xC001001D:
 586                 regNum = MISCREG_TOP_MEM2;
 587                 break;
 588               case 0xC0010114:
 589                 regNum = MISCREG_VM_CR;
 590                 break;
 591               case 0xC0010115:
 592                 regNum = MISCREG_IGNNE;
 593                 break;
 594               case 0xC0010116:
 595                 regNum = MISCREG_SMM_CTL;
 596                 break;
 597               case 0xC0010117:
 598                 regNum = MISCREG_VM_HSAVE_PA;
 599                 break;
 600               default:
 601                 return std::make_shared<GeneralProtection>(0);
 602             }
 603             //The index is multiplied by the size of a MiscReg so that
 604             //any memory dependence calculations will not see these as
 605             //overlapping.
 606             req->setPaddr(regNum * sizeof(RegVal));
 607             return NoFault;
 608         } else if (prefix == IntAddrPrefixIO) {
 609             // TODO If CPL > IOPL or in virtual mode, check the I/O permission
 610             // bitmap in the TSS.
 611
 612             Addr IOPort = vaddr & ~IntAddrPrefixMask;
 613             // Make sure the address fits in the expected 16 bit IO address
 614             // space.
 615             assert(!(IOPort & ~0xFFFF));
 616
 617             if (IOPort == 0xCF8 && req->getSize() == 4) {
 618                 req->setFlags(Request::MMAPPED_IPR);
 619                 req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(RegVal));
 620             } else if ((IOPort & ~mask(2)) == 0xCFC) {
 621                 req->setFlags(Request::UNCACHEABLE);
 622
 623                 Addr configAddress =
 624                     tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
 625
 626                 if (bits(configAddress, 31, 31)) {
 627                     req->setPaddr(PhysAddrPrefixPciConfig |
 628                                   mbits(configAddress, 30, 2) |
 629                                   (IOPort & mask(2)));
 630                 } else {
 631                     req->setPaddr(PhysAddrPrefixIO | IOPort);
 632                 }
 633             } else {
 634                 req->setFlags(Request::UNCACHEABLE);
 635                 req->setPaddr(PhysAddrPrefixIO | IOPort);
 636             }
 637             return NoFault;
 638         } else {
 639             panic("Access to unrecognized internal address space %#x.\n",
 640                   prefix);
 641         }
 642     }
 643
 644     /**
 645      * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
 646      * and false on a TLB miss.
 647      * Many of the checks about different modes have been converted to
 648      * assertions, since these parts of the code are not really used.
 649      * On a hit it will update the LRU stack.
 650      */
 651     bool
 652     GpuTLB::tlbLookup(const RequestPtr &req,
 653                       ThreadContext *tc, bool update_stats)
 654     {
 655         bool tlb_hit = false;
 656     #ifndef NDEBUG
 657         uint32_t flags = req->getFlags();
 658         int seg = flags & SegmentFlagMask;
 659     #endif
 660
 661         assert(seg != SEGMENT_REG_MS);
 662         Addr vaddr = req->getVaddr();
 663         DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
 664         HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
 665
 666         if (m5Reg.prot) {
 667             DPRINTF(GPUTLB, "In protected mode.\n");
 668             // make sure we are in 64-bit mode
 669             assert(m5Reg.mode == LongMode);
 670
 671             // If paging is enabled, do the translation.
 672             if (m5Reg.paging) {
 673                 DPRINTF(GPUTLB, "Paging enabled.\n");
 674                 //update LRU stack on a hit
 675                 TlbEntry *entry = lookup(vaddr, true);
 676
 677                 if (entry)
 678                     tlb_hit = true;
 679
 680                 if (!update_stats) {
 681                     // functional tlb access for memory initialization
 682                     // i.e., memory seeding or instr. seeding -> don't update
 683                     // TLB and stats
 684                     return tlb_hit;
 685                 }
 686
 687                 localNumTLBAccesses++;
 688
 689                 if (!entry) {
 690                     localNumTLBMisses++;
 691                 } else {
 692                     localNumTLBHits++;
 693                 }
 694             }
 695         }
 696
 697         return tlb_hit;
 698     }
 699
 700     Fault
 701     GpuTLB::translate(const RequestPtr &req, ThreadContext *tc,
 702                       Translation *translation, Mode mode,
 703                       bool &delayedResponse, bool timing, int &latency)
 704     {
 705         uint32_t flags = req->getFlags();
 706         int seg = flags & SegmentFlagMask;
 707         bool storeCheck = flags & (StoreCheck << FlagShift);
 708
 709         // If this is true, we're dealing with a request
 710         // to a non-memory address space.
 711         if (seg == SEGMENT_REG_MS) {
 712             return translateInt(req, tc);
 713         }
 714
 715         delayedResponse = false;
 716         Addr vaddr = req->getVaddr();
 717         DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
 718
 719         HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
 720
 721         // If protected mode has been enabled...
 722         if (m5Reg.prot) {
 723             DPRINTF(GPUTLB, "In protected mode.\n");
 724             // If we're not in 64-bit mode, do protection/limit checks
 725             if (m5Reg.mode != LongMode) {
 726                 DPRINTF(GPUTLB, "Not in long mode. Checking segment "
 727                         "protection.\n");
 728
 729                 // Check for a null segment selector.
 730                 if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
 731                     seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
 732                     && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
 733                     return std::make_shared<GeneralProtection>(0);
 734                 }
 735
 736                 bool expandDown = false;
 737                 SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
 738
 739                 if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
 740                     if (!attr.writable && (mode == BaseTLB::Write ||
 741                         storeCheck))
 742                         return std::make_shared<GeneralProtection>(0);
 743
 744                     if (!attr.readable && mode == BaseTLB::Read)
 745                         return std::make_shared<GeneralProtection>(0);
 746
 747                     expandDown = attr.expandDown;
 748
 749                 }
 750
 751                 Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));
 752                 Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
 753                 // This assumes we're not in 64 bit mode. If we were, the
 754                 // default address size is 64 bits, overridable to 32.
 755                 int size = 32;
 756                 bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
 757                 SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
 758
 759                 if ((csAttr.defaultSize && sizeOverride) ||
 760                     (!csAttr.defaultSize && !sizeOverride)) {
 761                     size = 16;
 762                 }
 763
 764                 Addr offset = bits(vaddr - base, size - 1, 0);
 765                 Addr endOffset = offset + req->getSize() - 1;
 766
 767                 if (expandDown) {
 768                     DPRINTF(GPUTLB, "Checking an expand down segment.\n");
 769                     warn_once("Expand down segments are untested.\n");
 770
 771                     if (offset <= limit || endOffset <= limit)
 772                         return std::make_shared<GeneralProtection>(0);
 773                 } else {
 774                     if (offset > limit || endOffset > limit)
 775                         return std::make_shared<GeneralProtection>(0);
 776                 }
 777             }
 778
 779             // If paging is enabled, do the translation.
 780             if (m5Reg.paging) {
 781                 DPRINTF(GPUTLB, "Paging enabled.\n");
 782                 // The vaddr already has the segment base applied.
 783                 TlbEntry *entry = lookup(vaddr);
 784                 localNumTLBAccesses++;
 785
 786                 if (!entry) {
 787                     localNumTLBMisses++;
 788                     if (timing) {
 789                         latency = missLatency1;
 790                     }
 791
 792                     if (FullSystem) {
 793                         fatal("GpuTLB doesn't support full-system mode\n");
 794                     } else {
 795                         DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
 796                                 "at pc %#x.\n", vaddr, tc->instAddr());
 797
 798                         Process *p = tc->getProcessPtr();
 799                         const EmulationPageTable::Entry *pte =
 800                             p->pTable->lookup(vaddr);
 801
 802                         if (!pte && mode != BaseTLB::Execute) {
 803                             // penalize a "page fault" more
 804                             if (timing)
 805                                 latency += missLatency2;
 806
 807                             if (p->fixupStackFault(vaddr))
 808                                 pte = p->pTable->lookup(vaddr);
 809                         }
 810
 811                         if (!pte) {
 812                             return std::make_shared<PageFault>(vaddr, true,
 813                                                                mode, true,
 814                                                                false);
 815                         } else {
 816                             Addr alignedVaddr = p->pTable->pageAlign(vaddr);
 817
 818                             DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
 819                                     alignedVaddr, pte->paddr);
 820
 821                             TlbEntry gpuEntry(p->pid(), alignedVaddr,
 822                                               pte->paddr, false, false);
 823                             entry = insert(alignedVaddr, gpuEntry);
 824                         }
 825
 826                         DPRINTF(GPUTLB, "Miss was serviced.\n");
 827                     }
 828                 } else {
 829                     localNumTLBHits++;
 830
 831                     if (timing) {
 832                         latency = hitLatency;
 833                     }
 834                 }
 835
 836                 // Do paging protection checks.
 837                 bool inUser = (m5Reg.cpl == 3 &&
 838                                !(flags & (CPL0FlagBit << FlagShift)));
 839
 840                 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
 841                 bool badWrite = (!entry->writable && (inUser || cr0.wp));
 842
 843                 if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
 844                      badWrite)) {
 845                     // The page must have been present to get into the TLB in
 846                     // the first place. We'll assume the reserved bits are
 847                     // fine even though we're not checking them.
 848                     return std::make_shared<PageFault>(vaddr, true, mode,
 849                                                        inUser, false);
 850                 }
 851
 852                 if (storeCheck && badWrite) {
 853                     // This would fault if this were a write, so return a page
 854                     // fault that reflects that happening.
 855                     return std::make_shared<PageFault>(vaddr, true,
 856                                                        BaseTLB::Write,
 857                                                        inUser, false);
 858                 }
 859
 860
 861                 DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
 862                         "checks.\n", entry->paddr);
 863
 864                 int page_size = entry->size();
 865                 Addr paddr = entry->paddr | (vaddr & (page_size - 1));
 866                 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
 867                 req->setPaddr(paddr);
 868
 869                 if (entry->uncacheable)
 870                     req->setFlags(Request::UNCACHEABLE);
 871             } else {
 872                 //Use the address which already has segmentation applied.
 873                 DPRINTF(GPUTLB, "Paging disabled.\n");
 874                 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
 875                 req->setPaddr(vaddr);
 876             }
 877         } else {
 878             // Real mode
 879             DPRINTF(GPUTLB, "In real mode.\n");
 880             DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
 881             req->setPaddr(vaddr);
 882         }
 883
 884         // Check for an access to the local APIC
 885         if (FullSystem) {
 886             LocalApicBase localApicBase =
 887                 tc->readMiscRegNoEffect(MISCREG_APIC_BASE);
 888
 889             Addr baseAddr = localApicBase.base * PageBytes;
 890             Addr paddr = req->getPaddr();
 891
 892             if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
 893                 // Force the access to be uncacheable.
 894                 req->setFlags(Request::UNCACHEABLE);
 895                 req->setPaddr(x86LocalAPICAddress(tc->contextId(),
 896                                                   paddr - baseAddr));
 897             }
 898         }
 899
 900         return NoFault;
 901     };
 902
 903     Fault
 904     GpuTLB::translateAtomic(const RequestPtr &req, ThreadContext *tc,
 905                             Mode mode, int &latency)
 906     {
 907         bool delayedResponse;
 908
 909         return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
 910                                  latency);
 911     }
 912
 913     void
 914     GpuTLB::translateTiming(const RequestPtr &req, ThreadContext *tc,
 915             Translation *translation, Mode mode, int &latency)
 916     {
 917         bool delayedResponse;
 918         assert(translation);
 919
 920         Fault fault = GpuTLB::translate(req, tc, translation, mode,
 921                                         delayedResponse, true, latency);
 922
 923         if (!delayedResponse)
 924             translation->finish(fault, req, tc, mode);
 925     }
 926
 927     Walker*
 928     GpuTLB::getWalker()
 929     {
 930         return walker;
 931     }
 932
 933
 934     void
 935     GpuTLB::serialize(CheckpointOut &cp) const
 936     {
 937     }
 938
 939     void
 940     GpuTLB::unserialize(CheckpointIn &cp)
 941     {
 942     }
 943
 944     void
 945     GpuTLB::regStats()
 946     {
 947         ClockedObject::regStats();
 948
 949         localNumTLBAccesses
 950             .name(name() + ".local_TLB_accesses")
 951             .desc("Number of TLB accesses")
 952             ;
 953
 954         localNumTLBHits
 955             .name(name() + ".local_TLB_hits")
 956             .desc("Number of TLB hits")
 957             ;
 958
 959         localNumTLBMisses
 960             .name(name() + ".local_TLB_misses")
 961             .desc("Number of TLB misses")
 962             ;
 963
 964         localTLBMissRate
 965             .name(name() + ".local_TLB_miss_rate")
 966             .desc("TLB miss rate")
 967             ;
 968
 969         accessCycles
 970             .name(name() + ".access_cycles")
 971             .desc("Cycles spent accessing this TLB level")
 972             ;
 973
 974         pageTableCycles
 975             .name(name() + ".page_table_cycles")
 976             .desc("Cycles spent accessing the page table")
 977             ;
 978
 979         localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
 980
 981         numUniquePages
 982             .name(name() + ".unique_pages")
 983             .desc("Number of unique pages touched")
 984             ;
 985
 986         localCycles
 987             .name(name() + ".local_cycles")
 988             .desc("Number of cycles spent in queue for all incoming reqs")
 989             ;
 990
 991         localLatency
 992             .name(name() + ".local_latency")
 993             .desc("Avg. latency over incoming coalesced reqs")
 994             ;
 995
 996         localLatency = localCycles / localNumTLBAccesses;
 997
 998         globalNumTLBAccesses
 999             .name(name() + ".global_TLB_accesses")
1000             .desc("Number of TLB accesses")
1001             ;
1002
1003         globalNumTLBHits
1004             .name(name() + ".global_TLB_hits")
1005             .desc("Number of TLB hits")
1006             ;
1007
1008         globalNumTLBMisses
1009             .name(name() + ".global_TLB_misses")
1010             .desc("Number of TLB misses")
1011             ;
1012
1013         globalTLBMissRate
1014             .name(name() + ".global_TLB_miss_rate")
1015             .desc("TLB miss rate")
1016             ;
1017
1018         globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
1019
1020         avgReuseDistance
1021             .name(name() + ".avg_reuse_distance")
1022             .desc("avg. reuse distance over all pages (in ticks)")
1023             ;
1024
1025     }
1026
1027     /**
1028      * Do the TLB lookup for this coalesced request and schedule
1029      * another event <TLB access latency> cycles later.
1030      */
1031
1032     void
1033     GpuTLB::issueTLBLookup(PacketPtr pkt)
1034     {
1035         assert(pkt);
1036         assert(pkt->senderState);
1037
1038         Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1039                                         TheISA::PageBytes);
1040
1041         TranslationState *sender_state =
1042                 safe_cast<TranslationState*>(pkt->senderState);
1043
1044         bool update_stats = !sender_state->prefetch;
1045         ThreadContext * tmp_tc = sender_state->tc;
1046
1047         DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
1048                 virt_page_addr);
1049
1050         int req_cnt = sender_state->reqCnt.back();
1051
1052         if (update_stats) {
1053             accessCycles -= (curTick() * req_cnt);
1054             localCycles -= curTick();
1055             updatePageFootprint(virt_page_addr);
1056             globalNumTLBAccesses += req_cnt;
1057         }
1058
1059         tlbOutcome lookup_outcome = TLB_MISS;
1060         const RequestPtr &tmp_req = pkt->req;
1061
1062         // Access the TLB and figure out if it's a hit or a miss.
1063         bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
1064
1065         if (success) {
1066             lookup_outcome = TLB_HIT;
1067             // Put the entry in SenderState
1068             TlbEntry *entry = lookup(tmp_req->getVaddr(), false);
1069             assert(entry);
1070
1071             auto p = sender_state->tc->getProcessPtr();
1072             sender_state->tlbEntry =
1073                 new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1074                              false, false);
1075
1076             if (update_stats) {
1077                 // the reqCnt has an entry per level, so its size tells us
1078                 // which level we are in
1079                 sender_state->hitLevel = sender_state->reqCnt.size();
1080                 globalNumTLBHits += req_cnt;
1081             }
1082         } else {
1083             if (update_stats)
1084                 globalNumTLBMisses += req_cnt;
1085         }
1086
1087         /*
1088          * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1089          * as the TLB access latency.
1090          *
1091          * We create and schedule a new TLBEvent which will help us take the
1092          * appropriate actions (e.g., update TLB on a hit, send request to lower
1093          * level TLB on a miss, or start a page walk if this was the last-level
1094          * TLB)
1095          */
1096         TLBEvent *tlb_event =
1097             new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
1098
1099         if (translationReturnEvent.count(virt_page_addr)) {
1100             panic("Virtual Page Address %#x already has a return event\n",
1101                   virt_page_addr);
1102         }
1103
1104         translationReturnEvent[virt_page_addr] = tlb_event;
1105         assert(tlb_event);
1106
1107         DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
1108                 curTick() + this->ticks(hitLatency));
1109
1110         schedule(tlb_event, curTick() + this->ticks(hitLatency));
1111     }
1112
1113     GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome,
1114                                PacketPtr _pkt)
1115         : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
1116         outcome(tlb_outcome), pkt(_pkt)
1117     {
1118     }
1119
1120     /**
1121      * Do Paging protection checks. If we encounter a page fault, then
1122      * an assertion is fired.
1123      */
1124     void
1125     GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
1126             TlbEntry * tlb_entry, Mode mode)
1127     {
1128         HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
1129         uint32_t flags = pkt->req->getFlags();
1130         bool storeCheck = flags & (StoreCheck << FlagShift);
1131
1132         // Do paging protection checks.
1133         bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
1134         CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
1135
1136         bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
1137
1138         if ((inUser && !tlb_entry->user) ||
1139             (mode == BaseTLB::Write && badWrite)) {
1140             // The page must have been present to get into the TLB in
1141             // the first place. We'll assume the reserved bits are
1142             // fine even though we're not checking them.
1143             panic("Page fault detected");
1144         }
1145
1146         if (storeCheck && badWrite) {
1147             // This would fault if this were a write, so return a page
1148             // fault that reflects that happening.
1149             panic("Page fault detected");
1150         }
1151     }
1152
1153     /**
1154      * handleTranslationReturn is called on a TLB hit,
1155      * when a TLB miss returns or when a page fault returns.
1156      * The latter calls handelHit with TLB miss as tlbOutcome.
1157      */
1158     void
1159     GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
1160             PacketPtr pkt)
1161     {
1162
1163         assert(pkt);
1164         Addr vaddr = pkt->req->getVaddr();
1165
1166         TranslationState *sender_state =
1167             safe_cast<TranslationState*>(pkt->senderState);
1168
1169         ThreadContext *tc = sender_state->tc;
1170         Mode mode = sender_state->tlbMode;
1171
1172         TlbEntry *local_entry, *new_entry;
1173
1174         if (tlb_outcome == TLB_HIT) {
1175             DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
1176             local_entry = sender_state->tlbEntry;
1177         } else {
1178             DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
1179                     vaddr);
1180
1181             // We are returning either from a page walk or from a hit at a lower
1182             // TLB level. The senderState should be "carrying" a pointer to the
1183             // correct TLBEntry.
1184             new_entry = sender_state->tlbEntry;
1185             assert(new_entry);
1186             local_entry = new_entry;
1187
1188             if (allocationPolicy) {
1189                 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1190                         virt_page_addr);
1191
1192                 local_entry = insert(virt_page_addr, *new_entry);
1193             }
1194
1195             assert(local_entry);
1196         }
1197
1198         /**
1199          * At this point the packet carries an up-to-date tlbEntry pointer
1200          * in its senderState.
1201          * Next step is to do the paging protection checks.
1202          */
1203         DPRINTF(GPUTLB, "Entry found with vaddr %#x,  doing protection checks "
1204                 "while paddr was %#x.\n", local_entry->vaddr,
1205                 local_entry->paddr);
1206
1207         pagingProtectionChecks(tc, pkt, local_entry, mode);
1208         int page_size = local_entry->size();
1209         Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1210         DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1211
1212         // Since this packet will be sent through the cpu side slave port,
1213         // it must be converted to a response pkt if it is not one already
1214         if (pkt->isRequest()) {
1215             pkt->makeTimingResponse();
1216         }
1217
1218         pkt->req->setPaddr(paddr);
1219
1220         if (local_entry->uncacheable) {
1221              pkt->req->setFlags(Request::UNCACHEABLE);
1222         }
1223
1224         //send packet back to coalescer
1225         cpuSidePort[0]->sendTimingResp(pkt);
1226         //schedule cleanup event
1227         cleanupQueue.push(virt_page_addr);
1228
1229         // schedule this only once per cycle.
1230         // The check is required because we might have multiple translations
1231         // returning the same cycle
1232         // this is a maximum priority event and must be on the same cycle
1233         // as the cleanup event in TLBCoalescer to avoid a race with
1234         // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1235         if (!cleanupEvent.scheduled())
1236             schedule(cleanupEvent, curTick());
1237     }
1238
1239     /**
1240      * Here we take the appropriate actions based on the result of the
1241      * TLB lookup.
1242      */
1243     void
1244     GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
1245                               PacketPtr pkt)
1246     {
1247         DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
1248
1249         assert(translationReturnEvent[virtPageAddr]);
1250         assert(pkt);
1251
1252         TranslationState *tmp_sender_state =
1253             safe_cast<TranslationState*>(pkt->senderState);
1254
1255         int req_cnt = tmp_sender_state->reqCnt.back();
1256         bool update_stats = !tmp_sender_state->prefetch;
1257
1258
1259         if (outcome == TLB_HIT) {
1260             handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
1261
1262             if (update_stats) {
1263                 accessCycles += (req_cnt * curTick());
1264                 localCycles += curTick();
1265             }
1266
1267         } else if (outcome == TLB_MISS) {
1268
1269             DPRINTF(GPUTLB, "This is a TLB miss\n");
1270             if (update_stats) {
1271                 accessCycles += (req_cnt*curTick());
1272                 localCycles += curTick();
1273             }
1274
1275             if (hasMemSidePort) {
1276                 // the one cyle added here represent the delay from when we get
1277                 // the reply back till when we propagate it to the coalescer
1278                 // above.
1279                 if (update_stats) {
1280                     accessCycles += (req_cnt * 1);
1281                     localCycles += 1;
1282                 }
1283
1284                 /**
1285                  * There is a TLB below. Send the coalesced request.
1286                  * We actually send the very first packet of all the
1287                  * pending packets for this virtual page address.
1288                  */
1289                 if (!memSidePort[0]->sendTimingReq(pkt)) {
1290                     DPRINTF(GPUTLB, "Failed sending translation request to "
1291                             "lower level TLB for addr %#x\n", virtPageAddr);
1292
1293                     memSidePort[0]->retries.push_back(pkt);
1294                 } else {
1295                     DPRINTF(GPUTLB, "Sent translation request to lower level "
1296                             "TLB for addr %#x\n", virtPageAddr);
1297                 }
1298             } else {
1299                 //this is the last level TLB. Start a page walk
1300                 DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1301                         "addr %#x\n", virtPageAddr);
1302
1303                 if (update_stats)
1304                     pageTableCycles -= (req_cnt*curTick());
1305
1306                 TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
1307                 assert(tlb_event);
1308                 tlb_event->updateOutcome(PAGE_WALK);
1309                 schedule(tlb_event, curTick() + ticks(missLatency2));
1310             }
1311         } else if (outcome == PAGE_WALK) {
1312             if (update_stats)
1313                 pageTableCycles += (req_cnt*curTick());
1314
1315             // Need to access the page table and update the TLB
1316             DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1317                     virtPageAddr);
1318
1319             TranslationState *sender_state =
1320                 safe_cast<TranslationState*>(pkt->senderState);
1321
1322             Process *p = sender_state->tc->getProcessPtr();
1323             Addr vaddr = pkt->req->getVaddr();
1324     #ifndef NDEBUG
1325             Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1326             assert(alignedVaddr == virtPageAddr);
1327     #endif
1328             const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr);
1329             if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1330                     p->fixupStackFault(vaddr)) {
1331                 pte = p->pTable->lookup(vaddr);
1332             }
1333
1334             if (pte) {
1335                 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1336                         pte->paddr);
1337
1338                 sender_state->tlbEntry =
1339                     new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false,
1340                                  false);
1341             } else {
1342                 sender_state->tlbEntry = nullptr;
1343             }
1344
1345             handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1346         } else if (outcome == MISS_RETURN) {
1347             /** we add an extra cycle in the return path of the translation
1348              * requests in between the various TLB levels.
1349              */
1350             handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1351         } else {
1352             panic("Unexpected TLB outcome %d", outcome);
1353         }
1354     }
1355
1356     void
1357     GpuTLB::TLBEvent::process()
1358     {
1359         tlb->translationReturn(virtPageAddr, outcome, pkt);
1360     }
1361
1362     const char*
1363     GpuTLB::TLBEvent::description() const
1364     {
1365         return "trigger translationDoneEvent";
1366     }
1367
1368     void
1369     GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
1370     {
1371         outcome = _outcome;
1372     }
1373
1374     Addr
1375     GpuTLB::TLBEvent::getTLBEventVaddr()
1376     {
1377         return virtPageAddr;
1378     }
1379
1380     /*
1381      * recvTiming receives a coalesced timing request from a TLBCoalescer
1382      * and it calls issueTLBLookup()
1383      * It only rejects the packet if we have exceeded the max
1384      * outstanding number of requests for the TLB
1385      */
1386     bool
1387     GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
1388     {
1389         if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
1390             tlb->issueTLBLookup(pkt);
1391             // update number of outstanding translation requests
1392             tlb->outstandingReqs++;
1393             return true;
1394          } else {
1395             DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1396                     tlb->outstandingReqs);
1397             return false;
1398          }
1399     }
1400
1401     /**
1402      * handleFuncTranslationReturn is called on a TLB hit,
1403      * when a TLB miss returns or when a page fault returns.
1404      * It updates LRU, inserts the TLB entry on a miss
1405      * depending on the allocation policy and does the required
1406      * protection checks. It does NOT create a new packet to
1407      * update the packet's addr; this is done in hsail-gpu code.
1408      */
1409     void
1410     GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
1411     {
1412         TranslationState *sender_state =
1413             safe_cast<TranslationState*>(pkt->senderState);
1414
1415         ThreadContext *tc = sender_state->tc;
1416         Mode mode = sender_state->tlbMode;
1417         Addr vaddr = pkt->req->getVaddr();
1418
1419         TlbEntry *local_entry, *new_entry;
1420
1421         if (tlb_outcome == TLB_HIT) {
1422             DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1423                     "%#x\n", vaddr);
1424
1425             local_entry = sender_state->tlbEntry;
1426         } else {
1427             DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1428                     "%#x\n", vaddr);
1429
1430             // We are returning either from a page walk or from a hit at a lower
1431             // TLB level. The senderState should be "carrying" a pointer to the
1432             // correct TLBEntry.
1433             new_entry = sender_state->tlbEntry;
1434             assert(new_entry);
1435             local_entry = new_entry;
1436
1437             if (allocationPolicy) {
1438                 Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1439
1440                 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1441                         virt_page_addr);
1442
1443                 local_entry = insert(virt_page_addr, *new_entry);
1444             }
1445
1446             assert(local_entry);
1447         }
1448
1449         DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1450                 "while paddr was %#x.\n", local_entry->vaddr,
1451                 local_entry->paddr);
1452
1453         /**
1454          * Do paging checks if it's a normal functional access.  If it's for a
1455          * prefetch, then sometimes you can try to prefetch something that
1456          * won't pass protection. We don't actually want to fault becuase there
1457          * is no demand access to deem this a violation.  Just put it in the
1458          * TLB and it will fault if indeed a future demand access touches it in
1459          * violation.
1460          *
1461          * This feature could be used to explore security issues around
1462          * speculative memory accesses.
1463          */
1464         if (!sender_state->prefetch && sender_state->tlbEntry)
1465             pagingProtectionChecks(tc, pkt, local_entry, mode);
1466
1467         int page_size = local_entry->size();
1468         Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1469         DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1470
1471         pkt->req->setPaddr(paddr);
1472
1473         if (local_entry->uncacheable)
1474              pkt->req->setFlags(Request::UNCACHEABLE);
1475     }
1476
1477     // This is used for atomic translations. Need to
1478     // make it all happen during the same cycle.
1479     void
1480     GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
1481     {
1482         TranslationState *sender_state =
1483             safe_cast<TranslationState*>(pkt->senderState);
1484
1485         ThreadContext *tc = sender_state->tc;
1486         bool update_stats = !sender_state->prefetch;
1487
1488         Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1489                                         TheISA::PageBytes);
1490
1491         if (update_stats)
1492             tlb->updatePageFootprint(virt_page_addr);
1493
1494         // do the TLB lookup without updating the stats
1495         bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1496         tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1497
1498         // functional mode means no coalescing
1499         // global metrics are the same as the local metrics
1500         if (update_stats) {
1501             tlb->globalNumTLBAccesses++;
1502
1503             if (success) {
1504                 sender_state->hitLevel = sender_state->reqCnt.size();
1505                 tlb->globalNumTLBHits++;
1506             }
1507         }
1508
1509         if (!success) {
1510             if (update_stats)
1511                 tlb->globalNumTLBMisses++;
1512             if (tlb->hasMemSidePort) {
1513                 // there is a TLB below -> propagate down the TLB hierarchy
1514                 tlb->memSidePort[0]->sendFunctional(pkt);
1515                 // If no valid translation from a prefetch, then just return
1516                 if (sender_state->prefetch && !pkt->req->hasPaddr())
1517                     return;
1518             } else {
1519                 // Need to access the page table and update the TLB
1520                 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1521                         virt_page_addr);
1522
1523                 Process *p = tc->getProcessPtr();
1524
1525                 Addr vaddr = pkt->req->getVaddr();
1526     #ifndef NDEBUG
1527                 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1528                 assert(alignedVaddr == virt_page_addr);
1529     #endif
1530
1531                 const EmulationPageTable::Entry *pte =
1532                         p->pTable->lookup(vaddr);
1533                 if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1534                         p->fixupStackFault(vaddr)) {
1535                     pte = p->pTable->lookup(vaddr);
1536                 }
1537
1538                 if (!sender_state->prefetch) {
1539                     // no PageFaults are permitted after
1540                     // the second page table lookup
1541                     assert(pte);
1542
1543                     DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1544                             pte->paddr);
1545
1546                     sender_state->tlbEntry =
1547                         new TlbEntry(p->pid(), virt_page_addr,
1548                                      pte->paddr, false, false);
1549                 } else {
1550                     // If this was a prefetch, then do the normal thing if it
1551                     // was a successful translation.  Otherwise, send an empty
1552                     // TLB entry back so that it can be figured out as empty and
1553                     // handled accordingly.
1554                     if (pte) {
1555                         DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1556                                 pte->paddr);
1557
1558                         sender_state->tlbEntry =
1559                             new TlbEntry(p->pid(), virt_page_addr,
1560                                          pte->paddr, false, false);
1561                     } else {
1562                         DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1563                                 alignedVaddr);
1564
1565                         sender_state->tlbEntry = nullptr;
1566
1567                         return;
1568                     }
1569                 }
1570             }
1571         } else {
1572             DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1573                     tlb->lookup(pkt->req->getVaddr()));
1574
1575             TlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1576                                              update_stats);
1577
1578             assert(entry);
1579
1580             auto p = sender_state->tc->getProcessPtr();
1581             sender_state->tlbEntry =
1582                 new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1583                              false, false);
1584         }
1585         // This is the function that would populate pkt->req with the paddr of
1586         // the translation. But if no translation happens (i.e Prefetch fails)
1587         // then the early returns in the above code wiill keep this function
1588         // from executing.
1589         tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1590     }
1591
1592     void
1593     GpuTLB::CpuSidePort::recvReqRetry()
1594     {
1595         // The CPUSidePort never sends anything but replies. No retries
1596         // expected.
1597         panic("recvReqRetry called");
1598     }
1599
1600     AddrRangeList
1601     GpuTLB::CpuSidePort::getAddrRanges() const
1602     {
1603         // currently not checked by the master
1604         AddrRangeList ranges;
1605
1606         return ranges;
1607     }
1608
1609     /**
1610      * MemSidePort receives the packet back.
1611      * We need to call the handleTranslationReturn
1612      * and propagate up the hierarchy.
1613      */
1614     bool
1615     GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
1616     {
1617         Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1618                                         TheISA::PageBytes);
1619
1620         DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1621                 virt_page_addr);
1622
1623         TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1624         assert(tlb_event);
1625         assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1626
1627         tlb_event->updateOutcome(MISS_RETURN);
1628         tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
1629
1630         return true;
1631     }
1632
1633     void
1634     GpuTLB::MemSidePort::recvReqRetry()
1635     {
1636         // No retries should reach the TLB. The retries
1637         // should only reach the TLBCoalescer.
1638         panic("recvReqRetry called");
1639     }
1640
1641     void
1642     GpuTLB::cleanup()
1643     {
1644         while (!cleanupQueue.empty()) {
1645             Addr cleanup_addr = cleanupQueue.front();
1646             cleanupQueue.pop();
1647
1648             // delete TLBEvent
1649             TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1650             delete old_tlb_event;
1651             translationReturnEvent.erase(cleanup_addr);
1652
1653             // update number of outstanding requests
1654             outstandingReqs--;
1655         }
1656
1657         /** the higher level coalescer should retry if it has
1658          * any pending requests.
1659          */
1660         for (int i = 0; i < cpuSidePort.size(); ++i) {
1661             cpuSidePort[i]->sendRetryReq();
1662         }
1663     }
1664
1665     void
1666     GpuTLB::updatePageFootprint(Addr virt_page_addr)
1667     {
1668
1669         std::pair<AccessPatternTable::iterator, bool> ret;
1670
1671         AccessInfo tmp_access_info;
1672         tmp_access_info.lastTimeAccessed = 0;
1673         tmp_access_info.accessesPerPage = 0;
1674         tmp_access_info.totalReuseDistance = 0;
1675         tmp_access_info.sumDistance = 0;
1676         tmp_access_info.meanDistance = 0;
1677
1678         ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1679                                   tmp_access_info));
1680
1681         bool first_page_access = ret.second;
1682
1683         if (first_page_access) {
1684             numUniquePages++;
1685         } else  {
1686             int accessed_before;
1687             accessed_before  = curTick() - ret.first->second.lastTimeAccessed;
1688             ret.first->second.totalReuseDistance += accessed_before;
1689         }
1690
1691         ret.first->second.accessesPerPage++;
1692         ret.first->second.lastTimeAccessed = curTick();
1693
1694         if (accessDistance) {
1695             ret.first->second.localTLBAccesses
1696                 .push_back(localNumTLBAccesses.value());
1697         }
1698     }
1699
1700     void
1701     GpuTLB::exitCallback()
1702     {
1703         std::ostream *page_stat_file = nullptr;
1704
1705         if (accessDistance) {
1706
1707             // print per page statistics to a separate file (.csv format)
1708             // simout is the gem5 output directory (default is m5out or the one
1709             // specified with -d
1710             page_stat_file = simout.create(name().c_str())->stream();
1711
1712             // print header
1713             *page_stat_file << "page,max_access_distance,mean_access_distance, "
1714                             << "stddev_distance" << std::endl;
1715         }
1716
1717         // update avg. reuse distance footprint
1718         AccessPatternTable::iterator iter, iter_begin, iter_end;
1719         unsigned int sum_avg_reuse_distance_per_page = 0;
1720
1721         // iterate through all pages seen by this TLB
1722         for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
1723             sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1724                                                iter->second.accessesPerPage;
1725
1726             if (accessDistance) {
1727                 unsigned int tmp = iter->second.localTLBAccesses[0];
1728                 unsigned int prev = tmp;
1729
1730                 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1731                     if (i) {
1732                         tmp = prev + 1;
1733                     }
1734
1735                     prev = iter->second.localTLBAccesses[i];
1736                     // update the localTLBAccesses value
1737                     // with the actual differece
1738                     iter->second.localTLBAccesses[i] -= tmp;
1739                     // compute the sum of AccessDistance per page
1740                     // used later for mean
1741                     iter->second.sumDistance +=
1742                         iter->second.localTLBAccesses[i];
1743                 }
1744
1745                 iter->second.meanDistance =
1746                     iter->second.sumDistance / iter->second.accessesPerPage;
1747
1748                 // compute std_dev and max  (we need a second round because we
1749                 // need to know the mean value
1750                 unsigned int max_distance = 0;
1751                 unsigned int stddev_distance = 0;
1752
1753                 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1754                     unsigned int tmp_access_distance =
1755                         iter->second.localTLBAccesses[i];
1756
1757                     if (tmp_access_distance > max_distance) {
1758                         max_distance = tmp_access_distance;
1759                     }
1760
1761                     unsigned int diff =
1762                         tmp_access_distance - iter->second.meanDistance;
1763                     stddev_distance += pow(diff, 2);
1764
1765                 }
1766
1767                 stddev_distance =
1768                     sqrt(stddev_distance/iter->second.accessesPerPage);
1769
1770                 if (page_stat_file) {
1771                     *page_stat_file << std::hex << iter->first << ",";
1772                     *page_stat_file << std::dec << max_distance << ",";
1773                     *page_stat_file << std::dec << iter->second.meanDistance
1774                                     << ",";
1775                     *page_stat_file << std::dec << stddev_distance;
1776                     *page_stat_file << std::endl;
1777                 }
1778
1779                 // erase the localTLBAccesses array
1780                 iter->second.localTLBAccesses.clear();
1781             }
1782         }
1783
1784         if (!TLBFootprint.empty()) {
1785             avgReuseDistance =
1786                 sum_avg_reuse_distance_per_page / TLBFootprint.size();
1787         }
1788
1789         //clear the TLBFootprint map
1790         TLBFootprint.clear();
1791     }
1792 } // namespace X86ISA
1793
1794 X86ISA::GpuTLB*
1795 X86GPUTLBParams::create()
1796 {
1797     return new X86ISA::GpuTLB(this);
1798 }
1799