src/gpu-compute/gpu_tlb.cc

   1 /*
   2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
   3  * All rights reserved.
   4  *
   5  * For use for simulation and test purposes only
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions are met:
   9  *
  10  * 1. Redistributions of source code must retain the above copyright notice,
  11  * this list of conditions and the following disclaimer.
  12  *
  13  * 2. Redistributions in binary form must reproduce the above copyright notice,
  14  * this list of conditions and the following disclaimer in the documentation
  15  * and/or other materials provided with the distribution.
  16  *
  17  * 3. Neither the name of the copyright holder nor the names of its contributors
  18  * may be used to endorse or promote products derived from this software
  19  * without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31  * POSSIBILITY OF SUCH DAMAGE.
  32  *
  33  * Author: Lisa Hsu
  34  */
  35
  36 #include "gpu-compute/gpu_tlb.hh"
  37
  38 #include <cmath>
  39 #include <cstring>
  40
  41 #include "arch/x86/faults.hh"
  42 #include "arch/x86/insts/microldstop.hh"
  43 #include "arch/x86/pagetable.hh"
  44 #include "arch/x86/pagetable_walker.hh"
  45 #include "arch/x86/regs/misc.hh"
  46 #include "arch/x86/x86_traits.hh"
  47 #include "base/bitfield.hh"
  48 #include "base/logging.hh"
  49 #include "base/output.hh"
  50 #include "base/trace.hh"
  51 #include "cpu/base.hh"
  52 #include "cpu/thread_context.hh"
  53 #include "debug/GPUPrefetch.hh"
  54 #include "debug/GPUTLB.hh"
  55 #include "mem/packet_access.hh"
  56 #include "mem/page_table.hh"
  57 #include "mem/request.hh"
  58 #include "sim/process.hh"
  59
  60 namespace X86ISA
  61 {
  62
  63     GpuTLB::GpuTLB(const Params *p)
  64         : ClockedObject(p), configAddress(0), size(p->size),
  65           cleanupEvent([this]{ cleanup(); }, name(), false,
  66                        Event::Maximum_Pri),
  67           exitEvent([this]{ exitCallback(); }, name())
  68     {
  69         assoc = p->assoc;
  70         assert(assoc <= size);
  71         numSets = size/assoc;
  72         allocationPolicy = p->allocationPolicy;
  73         hasMemSidePort = false;
  74         accessDistance = p->accessDistance;
  75         clock = p->clk_domain->clockPeriod();
  76
  77         tlb.assign(size, TlbEntry());
  78
  79         freeList.resize(numSets);
  80         entryList.resize(numSets);
  81
  82         for (int set = 0; set < numSets; ++set) {
  83             for (int way = 0; way < assoc; ++way) {
  84                 int x = set * assoc + way;
  85                 freeList[set].push_back(&tlb.at(x));
  86             }
  87         }
  88
  89         FA = (size == assoc);
  90
  91         /**
  92          * @warning: the set-associative version assumes you have a
  93          * fixed page size of 4KB.
  94          * If the page size is greather than 4KB (as defined in the
  95          * TheISA::PageBytes), then there are various issues w/ the current
  96          * implementation (you'd have the same 8KB page being replicated in
  97          * different sets etc)
  98          */
  99         setMask = numSets - 1;
 100
 101     #if 0
 102         // GpuTLB doesn't yet support full system
 103         walker = p->walker;
 104         walker->setTLB(this);
 105     #endif
 106
 107         maxCoalescedReqs = p->maxOutstandingReqs;
 108
 109         // Do not allow maxCoalescedReqs to be more than the TLB associativity
 110         if (maxCoalescedReqs > assoc) {
 111             maxCoalescedReqs = assoc;
 112             cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
 113         }
 114
 115         outstandingReqs = 0;
 116         hitLatency = p->hitLatency;
 117         missLatency1 = p->missLatency1;
 118         missLatency2 = p->missLatency2;
 119
 120         // create the slave ports based on the number of connected ports
 121         for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
 122             cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
 123                                   name(), i), this, i));
 124         }
 125
 126         // create the master ports based on the number of connected ports
 127         for (size_t i = 0; i < p->port_master_connection_count; ++i) {
 128             memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
 129                                   name(), i), this, i));
 130         }
 131     }
 132
 133     // fixme: this is never called?
 134     GpuTLB::~GpuTLB()
 135     {
 136         // make sure all the hash-maps are empty
 137         assert(translationReturnEvent.empty());
 138     }
 139
 140     Port &
 141     GpuTLB::getPort(const std::string &if_name, PortID idx)
 142     {
 143         if (if_name == "slave") {
 144             if (idx >= static_cast<PortID>(cpuSidePort.size())) {
 145                 panic("TLBCoalescer::getPort: unknown index %d\n", idx);
 146             }
 147
 148             return *cpuSidePort[idx];
 149         } else if (if_name == "master") {
 150             if (idx >= static_cast<PortID>(memSidePort.size())) {
 151                 panic("TLBCoalescer::getPort: unknown index %d\n", idx);
 152             }
 153
 154             hasMemSidePort = true;
 155
 156             return *memSidePort[idx];
 157         } else {
 158             panic("TLBCoalescer::getPort: unknown port %s\n", if_name);
 159         }
 160     }
 161
 162     TlbEntry*
 163     GpuTLB::insert(Addr vpn, TlbEntry &entry)
 164     {
 165         TlbEntry *newEntry = nullptr;
 166
 167         /**
 168          * vpn holds the virtual page address
 169          * The least significant bits are simply masked
 170          */
 171         int set = (vpn >> TheISA::PageShift) & setMask;
 172
 173         if (!freeList[set].empty()) {
 174             newEntry = freeList[set].front();
 175             freeList[set].pop_front();
 176         } else {
 177             newEntry = entryList[set].back();
 178             entryList[set].pop_back();
 179         }
 180
 181         *newEntry = entry;
 182         newEntry->vaddr = vpn;
 183         entryList[set].push_front(newEntry);
 184
 185         return newEntry;
 186     }
 187
 188     GpuTLB::EntryList::iterator
 189     GpuTLB::lookupIt(Addr va, bool update_lru)
 190     {
 191         int set = (va >> TheISA::PageShift) & setMask;
 192
 193         if (FA) {
 194             assert(!set);
 195         }
 196
 197         auto entry = entryList[set].begin();
 198         for (; entry != entryList[set].end(); ++entry) {
 199             int page_size = (*entry)->size();
 200
 201             if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
 202                 DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
 203                         "with size %#x.\n", va, (*entry)->vaddr, page_size);
 204
 205                 if (update_lru) {
 206                     entryList[set].push_front(*entry);
 207                     entryList[set].erase(entry);
 208                     entry = entryList[set].begin();
 209                 }
 210
 211                 break;
 212             }
 213         }
 214
 215         return entry;
 216     }
 217
 218     TlbEntry*
 219     GpuTLB::lookup(Addr va, bool update_lru)
 220     {
 221         int set = (va >> TheISA::PageShift) & setMask;
 222
 223         auto entry = lookupIt(va, update_lru);
 224
 225         if (entry == entryList[set].end())
 226             return nullptr;
 227         else
 228             return *entry;
 229     }
 230
 231     void
 232     GpuTLB::invalidateAll()
 233     {
 234         DPRINTF(GPUTLB, "Invalidating all entries.\n");
 235
 236         for (int i = 0; i < numSets; ++i) {
 237             while (!entryList[i].empty()) {
 238                 TlbEntry *entry = entryList[i].front();
 239                 entryList[i].pop_front();
 240                 freeList[i].push_back(entry);
 241             }
 242         }
 243     }
 244
 245     void
 246     GpuTLB::setConfigAddress(uint32_t addr)
 247     {
 248         configAddress = addr;
 249     }
 250
 251     void
 252     GpuTLB::invalidateNonGlobal()
 253     {
 254         DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
 255
 256         for (int i = 0; i < numSets; ++i) {
 257             for (auto entryIt = entryList[i].begin();
 258                  entryIt != entryList[i].end();) {
 259                 if (!(*entryIt)->global) {
 260                     freeList[i].push_back(*entryIt);
 261                     entryList[i].erase(entryIt++);
 262                 } else {
 263                     ++entryIt;
 264                 }
 265             }
 266         }
 267     }
 268
 269     void
 270     GpuTLB::demapPage(Addr va, uint64_t asn)
 271     {
 272
 273         int set = (va >> TheISA::PageShift) & setMask;
 274         auto entry = lookupIt(va, false);
 275
 276         if (entry != entryList[set].end()) {
 277             freeList[set].push_back(*entry);
 278             entryList[set].erase(entry);
 279         }
 280     }
 281
 282     Fault
 283     GpuTLB::translateInt(const RequestPtr &req, ThreadContext *tc)
 284     {
 285         DPRINTF(GPUTLB, "Addresses references internal memory.\n");
 286         Addr vaddr = req->getVaddr();
 287         Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
 288
 289         if (prefix == IntAddrPrefixCPUID) {
 290             panic("CPUID memory space not yet implemented!\n");
 291         } else if (prefix == IntAddrPrefixMSR) {
 292             vaddr = vaddr >> 3;
 293             req->setFlags(Request::MMAPPED_IPR);
 294             Addr regNum = 0;
 295
 296             switch (vaddr & ~IntAddrPrefixMask) {
 297               case 0x10:
 298                 regNum = MISCREG_TSC;
 299                 break;
 300               case 0x1B:
 301                 regNum = MISCREG_APIC_BASE;
 302                 break;
 303               case 0xFE:
 304                 regNum = MISCREG_MTRRCAP;
 305                 break;
 306               case 0x174:
 307                 regNum = MISCREG_SYSENTER_CS;
 308                 break;
 309               case 0x175:
 310                 regNum = MISCREG_SYSENTER_ESP;
 311                 break;
 312               case 0x176:
 313                 regNum = MISCREG_SYSENTER_EIP;
 314                 break;
 315               case 0x179:
 316                 regNum = MISCREG_MCG_CAP;
 317                 break;
 318               case 0x17A:
 319                 regNum = MISCREG_MCG_STATUS;
 320                 break;
 321               case 0x17B:
 322                 regNum = MISCREG_MCG_CTL;
 323                 break;
 324               case 0x1D9:
 325                 regNum = MISCREG_DEBUG_CTL_MSR;
 326                 break;
 327               case 0x1DB:
 328                 regNum = MISCREG_LAST_BRANCH_FROM_IP;
 329                 break;
 330               case 0x1DC:
 331                 regNum = MISCREG_LAST_BRANCH_TO_IP;
 332                 break;
 333               case 0x1DD:
 334                 regNum = MISCREG_LAST_EXCEPTION_FROM_IP;
 335                 break;
 336               case 0x1DE:
 337                 regNum = MISCREG_LAST_EXCEPTION_TO_IP;
 338                 break;
 339               case 0x200:
 340                 regNum = MISCREG_MTRR_PHYS_BASE_0;
 341                 break;
 342               case 0x201:
 343                 regNum = MISCREG_MTRR_PHYS_MASK_0;
 344                 break;
 345               case 0x202:
 346                 regNum = MISCREG_MTRR_PHYS_BASE_1;
 347                 break;
 348               case 0x203:
 349                 regNum = MISCREG_MTRR_PHYS_MASK_1;
 350                 break;
 351               case 0x204:
 352                 regNum = MISCREG_MTRR_PHYS_BASE_2;
 353                 break;
 354               case 0x205:
 355                 regNum = MISCREG_MTRR_PHYS_MASK_2;
 356                 break;
 357               case 0x206:
 358                 regNum = MISCREG_MTRR_PHYS_BASE_3;
 359                 break;
 360               case 0x207:
 361                 regNum = MISCREG_MTRR_PHYS_MASK_3;
 362                 break;
 363               case 0x208:
 364                 regNum = MISCREG_MTRR_PHYS_BASE_4;
 365                 break;
 366               case 0x209:
 367                 regNum = MISCREG_MTRR_PHYS_MASK_4;
 368                 break;
 369               case 0x20A:
 370                 regNum = MISCREG_MTRR_PHYS_BASE_5;
 371                 break;
 372               case 0x20B:
 373                 regNum = MISCREG_MTRR_PHYS_MASK_5;
 374                 break;
 375               case 0x20C:
 376                 regNum = MISCREG_MTRR_PHYS_BASE_6;
 377                 break;
 378               case 0x20D:
 379                 regNum = MISCREG_MTRR_PHYS_MASK_6;
 380                 break;
 381               case 0x20E:
 382                 regNum = MISCREG_MTRR_PHYS_BASE_7;
 383                 break;
 384               case 0x20F:
 385                 regNum = MISCREG_MTRR_PHYS_MASK_7;
 386                 break;
 387               case 0x250:
 388                 regNum = MISCREG_MTRR_FIX_64K_00000;
 389                 break;
 390               case 0x258:
 391                 regNum = MISCREG_MTRR_FIX_16K_80000;
 392                 break;
 393               case 0x259:
 394                 regNum = MISCREG_MTRR_FIX_16K_A0000;
 395                 break;
 396               case 0x268:
 397                 regNum = MISCREG_MTRR_FIX_4K_C0000;
 398                 break;
 399               case 0x269:
 400                 regNum = MISCREG_MTRR_FIX_4K_C8000;
 401                 break;
 402               case 0x26A:
 403                 regNum = MISCREG_MTRR_FIX_4K_D0000;
 404                 break;
 405               case 0x26B:
 406                 regNum = MISCREG_MTRR_FIX_4K_D8000;
 407                 break;
 408               case 0x26C:
 409                 regNum = MISCREG_MTRR_FIX_4K_E0000;
 410                 break;
 411               case 0x26D:
 412                 regNum = MISCREG_MTRR_FIX_4K_E8000;
 413                 break;
 414               case 0x26E:
 415                 regNum = MISCREG_MTRR_FIX_4K_F0000;
 416                 break;
 417               case 0x26F:
 418                 regNum = MISCREG_MTRR_FIX_4K_F8000;
 419                 break;
 420               case 0x277:
 421                 regNum = MISCREG_PAT;
 422                 break;
 423               case 0x2FF:
 424                 regNum = MISCREG_DEF_TYPE;
 425                 break;
 426               case 0x400:
 427                 regNum = MISCREG_MC0_CTL;
 428                 break;
 429               case 0x404:
 430                 regNum = MISCREG_MC1_CTL;
 431                 break;
 432               case 0x408:
 433                 regNum = MISCREG_MC2_CTL;
 434                 break;
 435               case 0x40C:
 436                 regNum = MISCREG_MC3_CTL;
 437                 break;
 438               case 0x410:
 439                 regNum = MISCREG_MC4_CTL;
 440                 break;
 441               case 0x414:
 442                 regNum = MISCREG_MC5_CTL;
 443                 break;
 444               case 0x418:
 445                 regNum = MISCREG_MC6_CTL;
 446                 break;
 447               case 0x41C:
 448                 regNum = MISCREG_MC7_CTL;
 449                 break;
 450               case 0x401:
 451                 regNum = MISCREG_MC0_STATUS;
 452                 break;
 453               case 0x405:
 454                 regNum = MISCREG_MC1_STATUS;
 455                 break;
 456               case 0x409:
 457                 regNum = MISCREG_MC2_STATUS;
 458                 break;
 459               case 0x40D:
 460                 regNum = MISCREG_MC3_STATUS;
 461                 break;
 462               case 0x411:
 463                 regNum = MISCREG_MC4_STATUS;
 464                 break;
 465               case 0x415:
 466                 regNum = MISCREG_MC5_STATUS;
 467                 break;
 468               case 0x419:
 469                 regNum = MISCREG_MC6_STATUS;
 470                 break;
 471               case 0x41D:
 472                 regNum = MISCREG_MC7_STATUS;
 473                 break;
 474               case 0x402:
 475                 regNum = MISCREG_MC0_ADDR;
 476                 break;
 477               case 0x406:
 478                 regNum = MISCREG_MC1_ADDR;
 479                 break;
 480               case 0x40A:
 481                 regNum = MISCREG_MC2_ADDR;
 482                 break;
 483               case 0x40E:
 484                 regNum = MISCREG_MC3_ADDR;
 485                 break;
 486               case 0x412:
 487                 regNum = MISCREG_MC4_ADDR;
 488                 break;
 489               case 0x416:
 490                 regNum = MISCREG_MC5_ADDR;
 491                 break;
 492               case 0x41A:
 493                 regNum = MISCREG_MC6_ADDR;
 494                 break;
 495               case 0x41E:
 496                 regNum = MISCREG_MC7_ADDR;
 497                 break;
 498               case 0x403:
 499                 regNum = MISCREG_MC0_MISC;
 500                 break;
 501               case 0x407:
 502                 regNum = MISCREG_MC1_MISC;
 503                 break;
 504               case 0x40B:
 505                 regNum = MISCREG_MC2_MISC;
 506                 break;
 507               case 0x40F:
 508                 regNum = MISCREG_MC3_MISC;
 509                 break;
 510               case 0x413:
 511                 regNum = MISCREG_MC4_MISC;
 512                 break;
 513               case 0x417:
 514                 regNum = MISCREG_MC5_MISC;
 515                 break;
 516               case 0x41B:
 517                 regNum = MISCREG_MC6_MISC;
 518                 break;
 519               case 0x41F:
 520                 regNum = MISCREG_MC7_MISC;
 521                 break;
 522               case 0xC0000080:
 523                 regNum = MISCREG_EFER;
 524                 break;
 525               case 0xC0000081:
 526                 regNum = MISCREG_STAR;
 527                 break;
 528               case 0xC0000082:
 529                 regNum = MISCREG_LSTAR;
 530                 break;
 531               case 0xC0000083:
 532                 regNum = MISCREG_CSTAR;
 533                 break;
 534               case 0xC0000084:
 535                 regNum = MISCREG_SF_MASK;
 536                 break;
 537               case 0xC0000100:
 538                 regNum = MISCREG_FS_BASE;
 539                 break;
 540               case 0xC0000101:
 541                 regNum = MISCREG_GS_BASE;
 542                 break;
 543               case 0xC0000102:
 544                 regNum = MISCREG_KERNEL_GS_BASE;
 545                 break;
 546               case 0xC0000103:
 547                 regNum = MISCREG_TSC_AUX;
 548                 break;
 549               case 0xC0010000:
 550                 regNum = MISCREG_PERF_EVT_SEL0;
 551                 break;
 552               case 0xC0010001:
 553                 regNum = MISCREG_PERF_EVT_SEL1;
 554                 break;
 555               case 0xC0010002:
 556                 regNum = MISCREG_PERF_EVT_SEL2;
 557                 break;
 558               case 0xC0010003:
 559                 regNum = MISCREG_PERF_EVT_SEL3;
 560                 break;
 561               case 0xC0010004:
 562                 regNum = MISCREG_PERF_EVT_CTR0;
 563                 break;
 564               case 0xC0010005:
 565                 regNum = MISCREG_PERF_EVT_CTR1;
 566                 break;
 567               case 0xC0010006:
 568                 regNum = MISCREG_PERF_EVT_CTR2;
 569                 break;
 570               case 0xC0010007:
 571                 regNum = MISCREG_PERF_EVT_CTR3;
 572                 break;
 573               case 0xC0010010:
 574                 regNum = MISCREG_SYSCFG;
 575                 break;
 576               case 0xC0010016:
 577                 regNum = MISCREG_IORR_BASE0;
 578                 break;
 579               case 0xC0010017:
 580                 regNum = MISCREG_IORR_BASE1;
 581                 break;
 582               case 0xC0010018:
 583                 regNum = MISCREG_IORR_MASK0;
 584                 break;
 585               case 0xC0010019:
 586                 regNum = MISCREG_IORR_MASK1;
 587                 break;
 588               case 0xC001001A:
 589                 regNum = MISCREG_TOP_MEM;
 590                 break;
 591               case 0xC001001D:
 592                 regNum = MISCREG_TOP_MEM2;
 593                 break;
 594               case 0xC0010114:
 595                 regNum = MISCREG_VM_CR;
 596                 break;
 597               case 0xC0010115:
 598                 regNum = MISCREG_IGNNE;
 599                 break;
 600               case 0xC0010116:
 601                 regNum = MISCREG_SMM_CTL;
 602                 break;
 603               case 0xC0010117:
 604                 regNum = MISCREG_VM_HSAVE_PA;
 605                 break;
 606               default:
 607                 return std::make_shared<GeneralProtection>(0);
 608             }
 609             //The index is multiplied by the size of a MiscReg so that
 610             //any memory dependence calculations will not see these as
 611             //overlapping.
 612             req->setPaddr(regNum * sizeof(RegVal));
 613             return NoFault;
 614         } else if (prefix == IntAddrPrefixIO) {
 615             // TODO If CPL > IOPL or in virtual mode, check the I/O permission
 616             // bitmap in the TSS.
 617
 618             Addr IOPort = vaddr & ~IntAddrPrefixMask;
 619             // Make sure the address fits in the expected 16 bit IO address
 620             // space.
 621             assert(!(IOPort & ~0xFFFF));
 622
 623             if (IOPort == 0xCF8 && req->getSize() == 4) {
 624                 req->setFlags(Request::MMAPPED_IPR);
 625                 req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(RegVal));
 626             } else if ((IOPort & ~mask(2)) == 0xCFC) {
 627                 req->setFlags(Request::UNCACHEABLE);
 628
 629                 Addr configAddress =
 630                     tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
 631
 632                 if (bits(configAddress, 31, 31)) {
 633                     req->setPaddr(PhysAddrPrefixPciConfig |
 634                                   mbits(configAddress, 30, 2) |
 635                                   (IOPort & mask(2)));
 636                 } else {
 637                     req->setPaddr(PhysAddrPrefixIO | IOPort);
 638                 }
 639             } else {
 640                 req->setFlags(Request::UNCACHEABLE);
 641                 req->setPaddr(PhysAddrPrefixIO | IOPort);
 642             }
 643             return NoFault;
 644         } else {
 645             panic("Access to unrecognized internal address space %#x.\n",
 646                   prefix);
 647         }
 648     }
 649
 650     /**
 651      * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
 652      * and false on a TLB miss.
 653      * Many of the checks about different modes have been converted to
 654      * assertions, since these parts of the code are not really used.
 655      * On a hit it will update the LRU stack.
 656      */
 657     bool
 658     GpuTLB::tlbLookup(const RequestPtr &req,
 659                       ThreadContext *tc, bool update_stats)
 660     {
 661         bool tlb_hit = false;
 662     #ifndef NDEBUG
 663         uint32_t flags = req->getFlags();
 664         int seg = flags & SegmentFlagMask;
 665     #endif
 666
 667         assert(seg != SEGMENT_REG_MS);
 668         Addr vaddr = req->getVaddr();
 669         DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
 670         HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
 671
 672         if (m5Reg.prot) {
 673             DPRINTF(GPUTLB, "In protected mode.\n");
 674             // make sure we are in 64-bit mode
 675             assert(m5Reg.mode == LongMode);
 676
 677             // If paging is enabled, do the translation.
 678             if (m5Reg.paging) {
 679                 DPRINTF(GPUTLB, "Paging enabled.\n");
 680                 //update LRU stack on a hit
 681                 TlbEntry *entry = lookup(vaddr, true);
 682
 683                 if (entry)
 684                     tlb_hit = true;
 685
 686                 if (!update_stats) {
 687                     // functional tlb access for memory initialization
 688                     // i.e., memory seeding or instr. seeding -> don't update
 689                     // TLB and stats
 690                     return tlb_hit;
 691                 }
 692
 693                 localNumTLBAccesses++;
 694
 695                 if (!entry) {
 696                     localNumTLBMisses++;
 697                 } else {
 698                     localNumTLBHits++;
 699                 }
 700             }
 701         }
 702
 703         return tlb_hit;
 704     }
 705
 706     Fault
 707     GpuTLB::translate(const RequestPtr &req, ThreadContext *tc,
 708                       Translation *translation, Mode mode,
 709                       bool &delayedResponse, bool timing, int &latency)
 710     {
 711         uint32_t flags = req->getFlags();
 712         int seg = flags & SegmentFlagMask;
 713         bool storeCheck = flags & (StoreCheck << FlagShift);
 714
 715         // If this is true, we're dealing with a request
 716         // to a non-memory address space.
 717         if (seg == SEGMENT_REG_MS) {
 718             return translateInt(req, tc);
 719         }
 720
 721         delayedResponse = false;
 722         Addr vaddr = req->getVaddr();
 723         DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
 724
 725         HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
 726
 727         // If protected mode has been enabled...
 728         if (m5Reg.prot) {
 729             DPRINTF(GPUTLB, "In protected mode.\n");
 730             // If we're not in 64-bit mode, do protection/limit checks
 731             if (m5Reg.mode != LongMode) {
 732                 DPRINTF(GPUTLB, "Not in long mode. Checking segment "
 733                         "protection.\n");
 734
 735                 // Check for a null segment selector.
 736                 if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
 737                     seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
 738                     && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
 739                     return std::make_shared<GeneralProtection>(0);
 740                 }
 741
 742                 bool expandDown = false;
 743                 SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
 744
 745                 if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
 746                     if (!attr.writable && (mode == BaseTLB::Write ||
 747                         storeCheck))
 748                         return std::make_shared<GeneralProtection>(0);
 749
 750                     if (!attr.readable && mode == BaseTLB::Read)
 751                         return std::make_shared<GeneralProtection>(0);
 752
 753                     expandDown = attr.expandDown;
 754
 755                 }
 756
 757                 Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));
 758                 Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
 759                 // This assumes we're not in 64 bit mode. If we were, the
 760                 // default address size is 64 bits, overridable to 32.
 761                 int size = 32;
 762                 bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
 763                 SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
 764
 765                 if ((csAttr.defaultSize && sizeOverride) ||
 766                     (!csAttr.defaultSize && !sizeOverride)) {
 767                     size = 16;
 768                 }
 769
 770                 Addr offset = bits(vaddr - base, size - 1, 0);
 771                 Addr endOffset = offset + req->getSize() - 1;
 772
 773                 if (expandDown) {
 774                     DPRINTF(GPUTLB, "Checking an expand down segment.\n");
 775                     warn_once("Expand down segments are untested.\n");
 776
 777                     if (offset <= limit || endOffset <= limit)
 778                         return std::make_shared<GeneralProtection>(0);
 779                 } else {
 780                     if (offset > limit || endOffset > limit)
 781                         return std::make_shared<GeneralProtection>(0);
 782                 }
 783             }
 784
 785             // If paging is enabled, do the translation.
 786             if (m5Reg.paging) {
 787                 DPRINTF(GPUTLB, "Paging enabled.\n");
 788                 // The vaddr already has the segment base applied.
 789                 TlbEntry *entry = lookup(vaddr);
 790                 localNumTLBAccesses++;
 791
 792                 if (!entry) {
 793                     localNumTLBMisses++;
 794                     if (timing) {
 795                         latency = missLatency1;
 796                     }
 797
 798                     if (FullSystem) {
 799                         fatal("GpuTLB doesn't support full-system mode\n");
 800                     } else {
 801                         DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
 802                                 "at pc %#x.\n", vaddr, tc->instAddr());
 803
 804                         Process *p = tc->getProcessPtr();
 805                         const EmulationPageTable::Entry *pte =
 806                             p->pTable->lookup(vaddr);
 807
 808                         if (!pte && mode != BaseTLB::Execute) {
 809                             // penalize a "page fault" more
 810                             if (timing)
 811                                 latency += missLatency2;
 812
 813                             if (p->fixupStackFault(vaddr))
 814                                 pte = p->pTable->lookup(vaddr);
 815                         }
 816
 817                         if (!pte) {
 818                             return std::make_shared<PageFault>(vaddr, true,
 819                                                                mode, true,
 820                                                                false);
 821                         } else {
 822                             Addr alignedVaddr = p->pTable->pageAlign(vaddr);
 823
 824                             DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
 825                                     alignedVaddr, pte->paddr);
 826
 827                             TlbEntry gpuEntry(p->pid(), alignedVaddr,
 828                                               pte->paddr, false, false);
 829                             entry = insert(alignedVaddr, gpuEntry);
 830                         }
 831
 832                         DPRINTF(GPUTLB, "Miss was serviced.\n");
 833                     }
 834                 } else {
 835                     localNumTLBHits++;
 836
 837                     if (timing) {
 838                         latency = hitLatency;
 839                     }
 840                 }
 841
 842                 // Do paging protection checks.
 843                 bool inUser = (m5Reg.cpl == 3 &&
 844                                !(flags & (CPL0FlagBit << FlagShift)));
 845
 846                 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
 847                 bool badWrite = (!entry->writable && (inUser || cr0.wp));
 848
 849                 if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
 850                      badWrite)) {
 851                     // The page must have been present to get into the TLB in
 852                     // the first place. We'll assume the reserved bits are
 853                     // fine even though we're not checking them.
 854                     return std::make_shared<PageFault>(vaddr, true, mode,
 855                                                        inUser, false);
 856                 }
 857
 858                 if (storeCheck && badWrite) {
 859                     // This would fault if this were a write, so return a page
 860                     // fault that reflects that happening.
 861                     return std::make_shared<PageFault>(vaddr, true,
 862                                                        BaseTLB::Write,
 863                                                        inUser, false);
 864                 }
 865
 866
 867                 DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
 868                         "checks.\n", entry->paddr);
 869
 870                 int page_size = entry->size();
 871                 Addr paddr = entry->paddr | (vaddr & (page_size - 1));
 872                 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
 873                 req->setPaddr(paddr);
 874
 875                 if (entry->uncacheable)
 876                     req->setFlags(Request::UNCACHEABLE);
 877             } else {
 878                 //Use the address which already has segmentation applied.
 879                 DPRINTF(GPUTLB, "Paging disabled.\n");
 880                 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
 881                 req->setPaddr(vaddr);
 882             }
 883         } else {
 884             // Real mode
 885             DPRINTF(GPUTLB, "In real mode.\n");
 886             DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
 887             req->setPaddr(vaddr);
 888         }
 889
 890         // Check for an access to the local APIC
 891         if (FullSystem) {
 892             LocalApicBase localApicBase =
 893                 tc->readMiscRegNoEffect(MISCREG_APIC_BASE);
 894
 895             Addr baseAddr = localApicBase.base * PageBytes;
 896             Addr paddr = req->getPaddr();
 897
 898             if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
 899                 // Force the access to be uncacheable.
 900                 req->setFlags(Request::UNCACHEABLE);
 901                 req->setPaddr(x86LocalAPICAddress(tc->contextId(),
 902                                                   paddr - baseAddr));
 903             }
 904         }
 905
 906         return NoFault;
 907     };
 908
 909     Fault
 910     GpuTLB::translateAtomic(const RequestPtr &req, ThreadContext *tc,
 911                             Mode mode, int &latency)
 912     {
 913         bool delayedResponse;
 914
 915         return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
 916                                  latency);
 917     }
 918
 919     void
 920     GpuTLB::translateTiming(const RequestPtr &req, ThreadContext *tc,
 921             Translation *translation, Mode mode, int &latency)
 922     {
 923         bool delayedResponse;
 924         assert(translation);
 925
 926         Fault fault = GpuTLB::translate(req, tc, translation, mode,
 927                                         delayedResponse, true, latency);
 928
 929         if (!delayedResponse)
 930             translation->finish(fault, req, tc, mode);
 931     }
 932
 933     Walker*
 934     GpuTLB::getWalker()
 935     {
 936         return walker;
 937     }
 938
 939
 940     void
 941     GpuTLB::serialize(CheckpointOut &cp) const
 942     {
 943     }
 944
 945     void
 946     GpuTLB::unserialize(CheckpointIn &cp)
 947     {
 948     }
 949
 950     void
 951     GpuTLB::regStats()
 952     {
 953         ClockedObject::regStats();
 954
 955         localNumTLBAccesses
 956             .name(name() + ".local_TLB_accesses")
 957             .desc("Number of TLB accesses")
 958             ;
 959
 960         localNumTLBHits
 961             .name(name() + ".local_TLB_hits")
 962             .desc("Number of TLB hits")
 963             ;
 964
 965         localNumTLBMisses
 966             .name(name() + ".local_TLB_misses")
 967             .desc("Number of TLB misses")
 968             ;
 969
 970         localTLBMissRate
 971             .name(name() + ".local_TLB_miss_rate")
 972             .desc("TLB miss rate")
 973             ;
 974
 975         accessCycles
 976             .name(name() + ".access_cycles")
 977             .desc("Cycles spent accessing this TLB level")
 978             ;
 979
 980         pageTableCycles
 981             .name(name() + ".page_table_cycles")
 982             .desc("Cycles spent accessing the page table")
 983             ;
 984
 985         localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
 986
 987         numUniquePages
 988             .name(name() + ".unique_pages")
 989             .desc("Number of unique pages touched")
 990             ;
 991
 992         localCycles
 993             .name(name() + ".local_cycles")
 994             .desc("Number of cycles spent in queue for all incoming reqs")
 995             ;
 996
 997         localLatency
 998             .name(name() + ".local_latency")
 999             .desc("Avg. latency over incoming coalesced reqs")
1000             ;
1001
1002         localLatency = localCycles / localNumTLBAccesses;
1003
1004         globalNumTLBAccesses
1005             .name(name() + ".global_TLB_accesses")
1006             .desc("Number of TLB accesses")
1007             ;
1008
1009         globalNumTLBHits
1010             .name(name() + ".global_TLB_hits")
1011             .desc("Number of TLB hits")
1012             ;
1013
1014         globalNumTLBMisses
1015             .name(name() + ".global_TLB_misses")
1016             .desc("Number of TLB misses")
1017             ;
1018
1019         globalTLBMissRate
1020             .name(name() + ".global_TLB_miss_rate")
1021             .desc("TLB miss rate")
1022             ;
1023
1024         globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
1025
1026         avgReuseDistance
1027             .name(name() + ".avg_reuse_distance")
1028             .desc("avg. reuse distance over all pages (in ticks)")
1029             ;
1030
1031     }
1032
1033     /**
1034      * Do the TLB lookup for this coalesced request and schedule
1035      * another event <TLB access latency> cycles later.
1036      */
1037
1038     void
1039     GpuTLB::issueTLBLookup(PacketPtr pkt)
1040     {
1041         assert(pkt);
1042         assert(pkt->senderState);
1043
1044         Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1045                                         TheISA::PageBytes);
1046
1047         TranslationState *sender_state =
1048                 safe_cast<TranslationState*>(pkt->senderState);
1049
1050         bool update_stats = !sender_state->prefetch;
1051         ThreadContext * tmp_tc = sender_state->tc;
1052
1053         DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
1054                 virt_page_addr);
1055
1056         int req_cnt = sender_state->reqCnt.back();
1057
1058         if (update_stats) {
1059             accessCycles -= (curTick() * req_cnt);
1060             localCycles -= curTick();
1061             updatePageFootprint(virt_page_addr);
1062             globalNumTLBAccesses += req_cnt;
1063         }
1064
1065         tlbOutcome lookup_outcome = TLB_MISS;
1066         const RequestPtr &tmp_req = pkt->req;
1067
1068         // Access the TLB and figure out if it's a hit or a miss.
1069         bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
1070
1071         if (success) {
1072             lookup_outcome = TLB_HIT;
1073             // Put the entry in SenderState
1074             TlbEntry *entry = lookup(tmp_req->getVaddr(), false);
1075             assert(entry);
1076
1077             auto p = sender_state->tc->getProcessPtr();
1078             sender_state->tlbEntry =
1079                 new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1080                              false, false);
1081
1082             if (update_stats) {
1083                 // the reqCnt has an entry per level, so its size tells us
1084                 // which level we are in
1085                 sender_state->hitLevel = sender_state->reqCnt.size();
1086                 globalNumTLBHits += req_cnt;
1087             }
1088         } else {
1089             if (update_stats)
1090                 globalNumTLBMisses += req_cnt;
1091         }
1092
1093         /*
1094          * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1095          * as the TLB access latency.
1096          *
1097          * We create and schedule a new TLBEvent which will help us take the
1098          * appropriate actions (e.g., update TLB on a hit, send request to lower
1099          * level TLB on a miss, or start a page walk if this was the last-level
1100          * TLB)
1101          */
1102         TLBEvent *tlb_event =
1103             new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
1104
1105         if (translationReturnEvent.count(virt_page_addr)) {
1106             panic("Virtual Page Address %#x already has a return event\n",
1107                   virt_page_addr);
1108         }
1109
1110         translationReturnEvent[virt_page_addr] = tlb_event;
1111         assert(tlb_event);
1112
1113         DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
1114                 curTick() + this->ticks(hitLatency));
1115
1116         schedule(tlb_event, curTick() + this->ticks(hitLatency));
1117     }
1118
1119     GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome,
1120                                PacketPtr _pkt)
1121         : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
1122         outcome(tlb_outcome), pkt(_pkt)
1123     {
1124     }
1125
1126     /**
1127      * Do Paging protection checks. If we encounter a page fault, then
1128      * an assertion is fired.
1129      */
1130     void
1131     GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
1132             TlbEntry * tlb_entry, Mode mode)
1133     {
1134         HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
1135         uint32_t flags = pkt->req->getFlags();
1136         bool storeCheck = flags & (StoreCheck << FlagShift);
1137
1138         // Do paging protection checks.
1139         bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
1140         CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
1141
1142         bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
1143
1144         if ((inUser && !tlb_entry->user) ||
1145             (mode == BaseTLB::Write && badWrite)) {
1146             // The page must have been present to get into the TLB in
1147             // the first place. We'll assume the reserved bits are
1148             // fine even though we're not checking them.
1149             panic("Page fault detected");
1150         }
1151
1152         if (storeCheck && badWrite) {
1153             // This would fault if this were a write, so return a page
1154             // fault that reflects that happening.
1155             panic("Page fault detected");
1156         }
1157     }
1158
1159     /**
1160      * handleTranslationReturn is called on a TLB hit,
1161      * when a TLB miss returns or when a page fault returns.
1162      * The latter calls handelHit with TLB miss as tlbOutcome.
1163      */
1164     void
1165     GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
1166             PacketPtr pkt)
1167     {
1168
1169         assert(pkt);
1170         Addr vaddr = pkt->req->getVaddr();
1171
1172         TranslationState *sender_state =
1173             safe_cast<TranslationState*>(pkt->senderState);
1174
1175         ThreadContext *tc = sender_state->tc;
1176         Mode mode = sender_state->tlbMode;
1177
1178         TlbEntry *local_entry, *new_entry;
1179
1180         if (tlb_outcome == TLB_HIT) {
1181             DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
1182             local_entry = sender_state->tlbEntry;
1183         } else {
1184             DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
1185                     vaddr);
1186
1187             // We are returning either from a page walk or from a hit at a lower
1188             // TLB level. The senderState should be "carrying" a pointer to the
1189             // correct TLBEntry.
1190             new_entry = sender_state->tlbEntry;
1191             assert(new_entry);
1192             local_entry = new_entry;
1193
1194             if (allocationPolicy) {
1195                 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1196                         virt_page_addr);
1197
1198                 local_entry = insert(virt_page_addr, *new_entry);
1199             }
1200
1201             assert(local_entry);
1202         }
1203
1204         /**
1205          * At this point the packet carries an up-to-date tlbEntry pointer
1206          * in its senderState.
1207          * Next step is to do the paging protection checks.
1208          */
1209         DPRINTF(GPUTLB, "Entry found with vaddr %#x,  doing protection checks "
1210                 "while paddr was %#x.\n", local_entry->vaddr,
1211                 local_entry->paddr);
1212
1213         pagingProtectionChecks(tc, pkt, local_entry, mode);
1214         int page_size = local_entry->size();
1215         Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1216         DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1217
1218         // Since this packet will be sent through the cpu side slave port,
1219         // it must be converted to a response pkt if it is not one already
1220         if (pkt->isRequest()) {
1221             pkt->makeTimingResponse();
1222         }
1223
1224         pkt->req->setPaddr(paddr);
1225
1226         if (local_entry->uncacheable) {
1227              pkt->req->setFlags(Request::UNCACHEABLE);
1228         }
1229
1230         //send packet back to coalescer
1231         cpuSidePort[0]->sendTimingResp(pkt);
1232         //schedule cleanup event
1233         cleanupQueue.push(virt_page_addr);
1234
1235         // schedule this only once per cycle.
1236         // The check is required because we might have multiple translations
1237         // returning the same cycle
1238         // this is a maximum priority event and must be on the same cycle
1239         // as the cleanup event in TLBCoalescer to avoid a race with
1240         // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1241         if (!cleanupEvent.scheduled())
1242             schedule(cleanupEvent, curTick());
1243     }
1244
1245     /**
1246      * Here we take the appropriate actions based on the result of the
1247      * TLB lookup.
1248      */
1249     void
1250     GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
1251                               PacketPtr pkt)
1252     {
1253         DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
1254
1255         assert(translationReturnEvent[virtPageAddr]);
1256         assert(pkt);
1257
1258         TranslationState *tmp_sender_state =
1259             safe_cast<TranslationState*>(pkt->senderState);
1260
1261         int req_cnt = tmp_sender_state->reqCnt.back();
1262         bool update_stats = !tmp_sender_state->prefetch;
1263
1264
1265         if (outcome == TLB_HIT) {
1266             handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
1267
1268             if (update_stats) {
1269                 accessCycles += (req_cnt * curTick());
1270                 localCycles += curTick();
1271             }
1272
1273         } else if (outcome == TLB_MISS) {
1274
1275             DPRINTF(GPUTLB, "This is a TLB miss\n");
1276             if (update_stats) {
1277                 accessCycles += (req_cnt*curTick());
1278                 localCycles += curTick();
1279             }
1280
1281             if (hasMemSidePort) {
1282                 // the one cyle added here represent the delay from when we get
1283                 // the reply back till when we propagate it to the coalescer
1284                 // above.
1285                 if (update_stats) {
1286                     accessCycles += (req_cnt * 1);
1287                     localCycles += 1;
1288                 }
1289
1290                 /**
1291                  * There is a TLB below. Send the coalesced request.
1292                  * We actually send the very first packet of all the
1293                  * pending packets for this virtual page address.
1294                  */
1295                 if (!memSidePort[0]->sendTimingReq(pkt)) {
1296                     DPRINTF(GPUTLB, "Failed sending translation request to "
1297                             "lower level TLB for addr %#x\n", virtPageAddr);
1298
1299                     memSidePort[0]->retries.push_back(pkt);
1300                 } else {
1301                     DPRINTF(GPUTLB, "Sent translation request to lower level "
1302                             "TLB for addr %#x\n", virtPageAddr);
1303                 }
1304             } else {
1305                 //this is the last level TLB. Start a page walk
1306                 DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1307                         "addr %#x\n", virtPageAddr);
1308
1309                 if (update_stats)
1310                     pageTableCycles -= (req_cnt*curTick());
1311
1312                 TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
1313                 assert(tlb_event);
1314                 tlb_event->updateOutcome(PAGE_WALK);
1315                 schedule(tlb_event, curTick() + ticks(missLatency2));
1316             }
1317         } else if (outcome == PAGE_WALK) {
1318             if (update_stats)
1319                 pageTableCycles += (req_cnt*curTick());
1320
1321             // Need to access the page table and update the TLB
1322             DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1323                     virtPageAddr);
1324
1325             TranslationState *sender_state =
1326                 safe_cast<TranslationState*>(pkt->senderState);
1327
1328             Process *p = sender_state->tc->getProcessPtr();
1329             Addr vaddr = pkt->req->getVaddr();
1330     #ifndef NDEBUG
1331             Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1332             assert(alignedVaddr == virtPageAddr);
1333     #endif
1334             const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr);
1335             if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1336                     p->fixupStackFault(vaddr)) {
1337                 pte = p->pTable->lookup(vaddr);
1338             }
1339
1340             if (pte) {
1341                 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1342                         pte->paddr);
1343
1344                 sender_state->tlbEntry =
1345                     new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false,
1346                                  false);
1347             } else {
1348                 sender_state->tlbEntry = nullptr;
1349             }
1350
1351             handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1352         } else if (outcome == MISS_RETURN) {
1353             /** we add an extra cycle in the return path of the translation
1354              * requests in between the various TLB levels.
1355              */
1356             handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1357         } else {
1358             panic("Unexpected TLB outcome %d", outcome);
1359         }
1360     }
1361
1362     void
1363     GpuTLB::TLBEvent::process()
1364     {
1365         tlb->translationReturn(virtPageAddr, outcome, pkt);
1366     }
1367
1368     const char*
1369     GpuTLB::TLBEvent::description() const
1370     {
1371         return "trigger translationDoneEvent";
1372     }
1373
1374     void
1375     GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
1376     {
1377         outcome = _outcome;
1378     }
1379
1380     Addr
1381     GpuTLB::TLBEvent::getTLBEventVaddr()
1382     {
1383         return virtPageAddr;
1384     }
1385
1386     /*
1387      * recvTiming receives a coalesced timing request from a TLBCoalescer
1388      * and it calls issueTLBLookup()
1389      * It only rejects the packet if we have exceeded the max
1390      * outstanding number of requests for the TLB
1391      */
1392     bool
1393     GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
1394     {
1395         if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
1396             tlb->issueTLBLookup(pkt);
1397             // update number of outstanding translation requests
1398             tlb->outstandingReqs++;
1399             return true;
1400          } else {
1401             DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1402                     tlb->outstandingReqs);
1403             return false;
1404          }
1405     }
1406
1407     /**
1408      * handleFuncTranslationReturn is called on a TLB hit,
1409      * when a TLB miss returns or when a page fault returns.
1410      * It updates LRU, inserts the TLB entry on a miss
1411      * depending on the allocation policy and does the required
1412      * protection checks. It does NOT create a new packet to
1413      * update the packet's addr; this is done in hsail-gpu code.
1414      */
1415     void
1416     GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
1417     {
1418         TranslationState *sender_state =
1419             safe_cast<TranslationState*>(pkt->senderState);
1420
1421         ThreadContext *tc = sender_state->tc;
1422         Mode mode = sender_state->tlbMode;
1423         Addr vaddr = pkt->req->getVaddr();
1424
1425         TlbEntry *local_entry, *new_entry;
1426
1427         if (tlb_outcome == TLB_HIT) {
1428             DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1429                     "%#x\n", vaddr);
1430
1431             local_entry = sender_state->tlbEntry;
1432         } else {
1433             DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1434                     "%#x\n", vaddr);
1435
1436             // We are returning either from a page walk or from a hit at a lower
1437             // TLB level. The senderState should be "carrying" a pointer to the
1438             // correct TLBEntry.
1439             new_entry = sender_state->tlbEntry;
1440             assert(new_entry);
1441             local_entry = new_entry;
1442
1443             if (allocationPolicy) {
1444                 Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1445
1446                 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1447                         virt_page_addr);
1448
1449                 local_entry = insert(virt_page_addr, *new_entry);
1450             }
1451
1452             assert(local_entry);
1453         }
1454
1455         DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1456                 "while paddr was %#x.\n", local_entry->vaddr,
1457                 local_entry->paddr);
1458
1459         /**
1460          * Do paging checks if it's a normal functional access.  If it's for a
1461          * prefetch, then sometimes you can try to prefetch something that
1462          * won't pass protection. We don't actually want to fault becuase there
1463          * is no demand access to deem this a violation.  Just put it in the
1464          * TLB and it will fault if indeed a future demand access touches it in
1465          * violation.
1466          *
1467          * This feature could be used to explore security issues around
1468          * speculative memory accesses.
1469          */
1470         if (!sender_state->prefetch && sender_state->tlbEntry)
1471             pagingProtectionChecks(tc, pkt, local_entry, mode);
1472
1473         int page_size = local_entry->size();
1474         Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1475         DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1476
1477         pkt->req->setPaddr(paddr);
1478
1479         if (local_entry->uncacheable)
1480              pkt->req->setFlags(Request::UNCACHEABLE);
1481     }
1482
1483     // This is used for atomic translations. Need to
1484     // make it all happen during the same cycle.
1485     void
1486     GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
1487     {
1488         TranslationState *sender_state =
1489             safe_cast<TranslationState*>(pkt->senderState);
1490
1491         ThreadContext *tc = sender_state->tc;
1492         bool update_stats = !sender_state->prefetch;
1493
1494         Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1495                                         TheISA::PageBytes);
1496
1497         if (update_stats)
1498             tlb->updatePageFootprint(virt_page_addr);
1499
1500         // do the TLB lookup without updating the stats
1501         bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1502         tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1503
1504         // functional mode means no coalescing
1505         // global metrics are the same as the local metrics
1506         if (update_stats) {
1507             tlb->globalNumTLBAccesses++;
1508
1509             if (success) {
1510                 sender_state->hitLevel = sender_state->reqCnt.size();
1511                 tlb->globalNumTLBHits++;
1512             }
1513         }
1514
1515         if (!success) {
1516             if (update_stats)
1517                 tlb->globalNumTLBMisses++;
1518             if (tlb->hasMemSidePort) {
1519                 // there is a TLB below -> propagate down the TLB hierarchy
1520                 tlb->memSidePort[0]->sendFunctional(pkt);
1521                 // If no valid translation from a prefetch, then just return
1522                 if (sender_state->prefetch && !pkt->req->hasPaddr())
1523                     return;
1524             } else {
1525                 // Need to access the page table and update the TLB
1526                 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1527                         virt_page_addr);
1528
1529                 Process *p = tc->getProcessPtr();
1530
1531                 Addr vaddr = pkt->req->getVaddr();
1532     #ifndef NDEBUG
1533                 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1534                 assert(alignedVaddr == virt_page_addr);
1535     #endif
1536
1537                 const EmulationPageTable::Entry *pte =
1538                         p->pTable->lookup(vaddr);
1539                 if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1540                         p->fixupStackFault(vaddr)) {
1541                     pte = p->pTable->lookup(vaddr);
1542                 }
1543
1544                 if (!sender_state->prefetch) {
1545                     // no PageFaults are permitted after
1546                     // the second page table lookup
1547                     assert(pte);
1548
1549                     DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1550                             pte->paddr);
1551
1552                     sender_state->tlbEntry =
1553                         new TlbEntry(p->pid(), virt_page_addr,
1554                                      pte->paddr, false, false);
1555                 } else {
1556                     // If this was a prefetch, then do the normal thing if it
1557                     // was a successful translation.  Otherwise, send an empty
1558                     // TLB entry back so that it can be figured out as empty and
1559                     // handled accordingly.
1560                     if (pte) {
1561                         DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1562                                 pte->paddr);
1563
1564                         sender_state->tlbEntry =
1565                             new TlbEntry(p->pid(), virt_page_addr,
1566                                          pte->paddr, false, false);
1567                     } else {
1568                         DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1569                                 alignedVaddr);
1570
1571                         sender_state->tlbEntry = nullptr;
1572
1573                         return;
1574                     }
1575                 }
1576             }
1577         } else {
1578             DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1579                     tlb->lookup(pkt->req->getVaddr()));
1580
1581             TlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1582                                              update_stats);
1583
1584             assert(entry);
1585
1586             auto p = sender_state->tc->getProcessPtr();
1587             sender_state->tlbEntry =
1588                 new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1589                              false, false);
1590         }
1591         // This is the function that would populate pkt->req with the paddr of
1592         // the translation. But if no translation happens (i.e Prefetch fails)
1593         // then the early returns in the above code wiill keep this function
1594         // from executing.
1595         tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1596     }
1597
1598     void
1599     GpuTLB::CpuSidePort::recvReqRetry()
1600     {
1601         // The CPUSidePort never sends anything but replies. No retries
1602         // expected.
1603         panic("recvReqRetry called");
1604     }
1605
1606     AddrRangeList
1607     GpuTLB::CpuSidePort::getAddrRanges() const
1608     {
1609         // currently not checked by the master
1610         AddrRangeList ranges;
1611
1612         return ranges;
1613     }
1614
1615     /**
1616      * MemSidePort receives the packet back.
1617      * We need to call the handleTranslationReturn
1618      * and propagate up the hierarchy.
1619      */
1620     bool
1621     GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
1622     {
1623         Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1624                                         TheISA::PageBytes);
1625
1626         DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1627                 virt_page_addr);
1628
1629         TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1630         assert(tlb_event);
1631         assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1632
1633         tlb_event->updateOutcome(MISS_RETURN);
1634         tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
1635
1636         return true;
1637     }
1638
1639     void
1640     GpuTLB::MemSidePort::recvReqRetry()
1641     {
1642         // No retries should reach the TLB. The retries
1643         // should only reach the TLBCoalescer.
1644         panic("recvReqRetry called");
1645     }
1646
1647     void
1648     GpuTLB::cleanup()
1649     {
1650         while (!cleanupQueue.empty()) {
1651             Addr cleanup_addr = cleanupQueue.front();
1652             cleanupQueue.pop();
1653
1654             // delete TLBEvent
1655             TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1656             delete old_tlb_event;
1657             translationReturnEvent.erase(cleanup_addr);
1658
1659             // update number of outstanding requests
1660             outstandingReqs--;
1661         }
1662
1663         /** the higher level coalescer should retry if it has
1664          * any pending requests.
1665          */
1666         for (int i = 0; i < cpuSidePort.size(); ++i) {
1667             cpuSidePort[i]->sendRetryReq();
1668         }
1669     }
1670
1671     void
1672     GpuTLB::updatePageFootprint(Addr virt_page_addr)
1673     {
1674
1675         std::pair<AccessPatternTable::iterator, bool> ret;
1676
1677         AccessInfo tmp_access_info;
1678         tmp_access_info.lastTimeAccessed = 0;
1679         tmp_access_info.accessesPerPage = 0;
1680         tmp_access_info.totalReuseDistance = 0;
1681         tmp_access_info.sumDistance = 0;
1682         tmp_access_info.meanDistance = 0;
1683
1684         ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1685                                   tmp_access_info));
1686
1687         bool first_page_access = ret.second;
1688
1689         if (first_page_access) {
1690             numUniquePages++;
1691         } else  {
1692             int accessed_before;
1693             accessed_before  = curTick() - ret.first->second.lastTimeAccessed;
1694             ret.first->second.totalReuseDistance += accessed_before;
1695         }
1696
1697         ret.first->second.accessesPerPage++;
1698         ret.first->second.lastTimeAccessed = curTick();
1699
1700         if (accessDistance) {
1701             ret.first->second.localTLBAccesses
1702                 .push_back(localNumTLBAccesses.value());
1703         }
1704     }
1705
1706     void
1707     GpuTLB::exitCallback()
1708     {
1709         std::ostream *page_stat_file = nullptr;
1710
1711         if (accessDistance) {
1712
1713             // print per page statistics to a separate file (.csv format)
1714             // simout is the gem5 output directory (default is m5out or the one
1715             // specified with -d
1716             page_stat_file = simout.create(name().c_str())->stream();
1717
1718             // print header
1719             *page_stat_file << "page,max_access_distance,mean_access_distance, "
1720                             << "stddev_distance" << std::endl;
1721         }
1722
1723         // update avg. reuse distance footprint
1724         AccessPatternTable::iterator iter, iter_begin, iter_end;
1725         unsigned int sum_avg_reuse_distance_per_page = 0;
1726
1727         // iterate through all pages seen by this TLB
1728         for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
1729             sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1730                                                iter->second.accessesPerPage;
1731
1732             if (accessDistance) {
1733                 unsigned int tmp = iter->second.localTLBAccesses[0];
1734                 unsigned int prev = tmp;
1735
1736                 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1737                     if (i) {
1738                         tmp = prev + 1;
1739                     }
1740
1741                     prev = iter->second.localTLBAccesses[i];
1742                     // update the localTLBAccesses value
1743                     // with the actual differece
1744                     iter->second.localTLBAccesses[i] -= tmp;
1745                     // compute the sum of AccessDistance per page
1746                     // used later for mean
1747                     iter->second.sumDistance +=
1748                         iter->second.localTLBAccesses[i];
1749                 }
1750
1751                 iter->second.meanDistance =
1752                     iter->second.sumDistance / iter->second.accessesPerPage;
1753
1754                 // compute std_dev and max  (we need a second round because we
1755                 // need to know the mean value
1756                 unsigned int max_distance = 0;
1757                 unsigned int stddev_distance = 0;
1758
1759                 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1760                     unsigned int tmp_access_distance =
1761                         iter->second.localTLBAccesses[i];
1762
1763                     if (tmp_access_distance > max_distance) {
1764                         max_distance = tmp_access_distance;
1765                     }
1766
1767                     unsigned int diff =
1768                         tmp_access_distance - iter->second.meanDistance;
1769                     stddev_distance += pow(diff, 2);
1770
1771                 }
1772
1773                 stddev_distance =
1774                     sqrt(stddev_distance/iter->second.accessesPerPage);
1775
1776                 if (page_stat_file) {
1777                     *page_stat_file << std::hex << iter->first << ",";
1778                     *page_stat_file << std::dec << max_distance << ",";
1779                     *page_stat_file << std::dec << iter->second.meanDistance
1780                                     << ",";
1781                     *page_stat_file << std::dec << stddev_distance;
1782                     *page_stat_file << std::endl;
1783                 }
1784
1785                 // erase the localTLBAccesses array
1786                 iter->second.localTLBAccesses.clear();
1787             }
1788         }
1789
1790         if (!TLBFootprint.empty()) {
1791             avgReuseDistance =
1792                 sum_avg_reuse_distance_per_page / TLBFootprint.size();
1793         }
1794
1795         //clear the TLBFootprint map
1796         TLBFootprint.clear();
1797     }
1798 } // namespace X86ISA
1799
1800 X86ISA::GpuTLB*
1801 X86GPUTLBParams::create()
1802 {
1803     return new X86ISA::GpuTLB(this);
1804 }
1805