2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
5 * For use for simulation and test purposes only
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
36 #include "gpu-compute/gpu_tlb.hh"
41 #include "arch/x86/faults.hh"
42 #include "arch/x86/insts/microldstop.hh"
43 #include "arch/x86/pagetable.hh"
44 #include "arch/x86/pagetable_walker.hh"
45 #include "arch/x86/regs/misc.hh"
46 #include "arch/x86/x86_traits.hh"
47 #include "base/bitfield.hh"
48 #include "base/logging.hh"
49 #include "base/output.hh"
50 #include "base/trace.hh"
51 #include "cpu/base.hh"
52 #include "cpu/thread_context.hh"
53 #include "debug/GPUPrefetch.hh"
54 #include "debug/GPUTLB.hh"
55 #include "mem/packet_access.hh"
56 #include "mem/page_table.hh"
57 #include "mem/request.hh"
58 #include "sim/process.hh"
63 GpuTLB::GpuTLB(const Params
*p
)
64 : MemObject(p
), configAddress(0), size(p
->size
),
65 cleanupEvent([this]{ cleanup(); }, name(), false,
67 exitEvent([this]{ exitCallback(); }, name())
70 assert(assoc
<= size
);
72 allocationPolicy
= p
->allocationPolicy
;
73 hasMemSidePort
= false;
74 accessDistance
= p
->accessDistance
;
75 clock
= p
->clk_domain
->clockPeriod();
77 tlb
.assign(size
, TlbEntry());
79 freeList
.resize(numSets
);
80 entryList
.resize(numSets
);
82 for (int set
= 0; set
< numSets
; ++set
) {
83 for (int way
= 0; way
< assoc
; ++way
) {
84 int x
= set
* assoc
+ way
;
85 freeList
[set
].push_back(&tlb
.at(x
));
92 * @warning: the set-associative version assumes you have a
93 * fixed page size of 4KB.
94 * If the page size is greather than 4KB (as defined in the
95 * TheISA::PageBytes), then there are various issues w/ the current
96 * implementation (you'd have the same 8KB page being replicated in
99 setMask
= numSets
- 1;
102 // GpuTLB doesn't yet support full system
104 walker
->setTLB(this);
107 maxCoalescedReqs
= p
->maxOutstandingReqs
;
109 // Do not allow maxCoalescedReqs to be more than the TLB associativity
110 if (maxCoalescedReqs
> assoc
) {
111 maxCoalescedReqs
= assoc
;
112 cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc
);
116 hitLatency
= p
->hitLatency
;
117 missLatency1
= p
->missLatency1
;
118 missLatency2
= p
->missLatency2
;
120 // create the slave ports based on the number of connected ports
121 for (size_t i
= 0; i
< p
->port_slave_connection_count
; ++i
) {
122 cpuSidePort
.push_back(new CpuSidePort(csprintf("%s-port%d",
123 name(), i
), this, i
));
126 // create the master ports based on the number of connected ports
127 for (size_t i
= 0; i
< p
->port_master_connection_count
; ++i
) {
128 memSidePort
.push_back(new MemSidePort(csprintf("%s-port%d",
129 name(), i
), this, i
));
133 // fixme: this is never called?
136 // make sure all the hash-maps are empty
137 assert(translationReturnEvent
.empty());
141 GpuTLB::getSlavePort(const std::string
&if_name
, PortID idx
)
143 if (if_name
== "slave") {
144 if (idx
>= static_cast<PortID
>(cpuSidePort
.size())) {
145 panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx
);
148 return *cpuSidePort
[idx
];
150 panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name
);
155 GpuTLB::getMasterPort(const std::string
&if_name
, PortID idx
)
157 if (if_name
== "master") {
158 if (idx
>= static_cast<PortID
>(memSidePort
.size())) {
159 panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx
);
162 hasMemSidePort
= true;
164 return *memSidePort
[idx
];
166 panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name
);
171 GpuTLB::insert(Addr vpn
, TlbEntry
&entry
)
173 TlbEntry
*newEntry
= nullptr;
176 * vpn holds the virtual page address
177 * The least significant bits are simply masked
179 int set
= (vpn
>> TheISA::PageShift
) & setMask
;
181 if (!freeList
[set
].empty()) {
182 newEntry
= freeList
[set
].front();
183 freeList
[set
].pop_front();
185 newEntry
= entryList
[set
].back();
186 entryList
[set
].pop_back();
190 newEntry
->vaddr
= vpn
;
191 entryList
[set
].push_front(newEntry
);
196 GpuTLB::EntryList::iterator
197 GpuTLB::lookupIt(Addr va
, bool update_lru
)
199 int set
= (va
>> TheISA::PageShift
) & setMask
;
205 auto entry
= entryList
[set
].begin();
206 for (; entry
!= entryList
[set
].end(); ++entry
) {
207 int page_size
= (*entry
)->size();
209 if ((*entry
)->vaddr
<= va
&& (*entry
)->vaddr
+ page_size
> va
) {
210 DPRINTF(GPUTLB
, "Matched vaddr %#x to entry starting at %#x "
211 "with size %#x.\n", va
, (*entry
)->vaddr
, page_size
);
214 entryList
[set
].push_front(*entry
);
215 entryList
[set
].erase(entry
);
216 entry
= entryList
[set
].begin();
227 GpuTLB::lookup(Addr va
, bool update_lru
)
229 int set
= (va
>> TheISA::PageShift
) & setMask
;
231 auto entry
= lookupIt(va
, update_lru
);
233 if (entry
== entryList
[set
].end())
240 GpuTLB::invalidateAll()
242 DPRINTF(GPUTLB
, "Invalidating all entries.\n");
244 for (int i
= 0; i
< numSets
; ++i
) {
245 while (!entryList
[i
].empty()) {
246 TlbEntry
*entry
= entryList
[i
].front();
247 entryList
[i
].pop_front();
248 freeList
[i
].push_back(entry
);
254 GpuTLB::setConfigAddress(uint32_t addr
)
256 configAddress
= addr
;
260 GpuTLB::invalidateNonGlobal()
262 DPRINTF(GPUTLB
, "Invalidating all non global entries.\n");
264 for (int i
= 0; i
< numSets
; ++i
) {
265 for (auto entryIt
= entryList
[i
].begin();
266 entryIt
!= entryList
[i
].end();) {
267 if (!(*entryIt
)->global
) {
268 freeList
[i
].push_back(*entryIt
);
269 entryList
[i
].erase(entryIt
++);
278 GpuTLB::demapPage(Addr va
, uint64_t asn
)
281 int set
= (va
>> TheISA::PageShift
) & setMask
;
282 auto entry
= lookupIt(va
, false);
284 if (entry
!= entryList
[set
].end()) {
285 freeList
[set
].push_back(*entry
);
286 entryList
[set
].erase(entry
);
291 GpuTLB::translateInt(const RequestPtr
&req
, ThreadContext
*tc
)
293 DPRINTF(GPUTLB
, "Addresses references internal memory.\n");
294 Addr vaddr
= req
->getVaddr();
295 Addr prefix
= (vaddr
>> 3) & IntAddrPrefixMask
;
297 if (prefix
== IntAddrPrefixCPUID
) {
298 panic("CPUID memory space not yet implemented!\n");
299 } else if (prefix
== IntAddrPrefixMSR
) {
301 req
->setFlags(Request::MMAPPED_IPR
);
304 switch (vaddr
& ~IntAddrPrefixMask
) {
306 regNum
= MISCREG_TSC
;
309 regNum
= MISCREG_APIC_BASE
;
312 regNum
= MISCREG_MTRRCAP
;
315 regNum
= MISCREG_SYSENTER_CS
;
318 regNum
= MISCREG_SYSENTER_ESP
;
321 regNum
= MISCREG_SYSENTER_EIP
;
324 regNum
= MISCREG_MCG_CAP
;
327 regNum
= MISCREG_MCG_STATUS
;
330 regNum
= MISCREG_MCG_CTL
;
333 regNum
= MISCREG_DEBUG_CTL_MSR
;
336 regNum
= MISCREG_LAST_BRANCH_FROM_IP
;
339 regNum
= MISCREG_LAST_BRANCH_TO_IP
;
342 regNum
= MISCREG_LAST_EXCEPTION_FROM_IP
;
345 regNum
= MISCREG_LAST_EXCEPTION_TO_IP
;
348 regNum
= MISCREG_MTRR_PHYS_BASE_0
;
351 regNum
= MISCREG_MTRR_PHYS_MASK_0
;
354 regNum
= MISCREG_MTRR_PHYS_BASE_1
;
357 regNum
= MISCREG_MTRR_PHYS_MASK_1
;
360 regNum
= MISCREG_MTRR_PHYS_BASE_2
;
363 regNum
= MISCREG_MTRR_PHYS_MASK_2
;
366 regNum
= MISCREG_MTRR_PHYS_BASE_3
;
369 regNum
= MISCREG_MTRR_PHYS_MASK_3
;
372 regNum
= MISCREG_MTRR_PHYS_BASE_4
;
375 regNum
= MISCREG_MTRR_PHYS_MASK_4
;
378 regNum
= MISCREG_MTRR_PHYS_BASE_5
;
381 regNum
= MISCREG_MTRR_PHYS_MASK_5
;
384 regNum
= MISCREG_MTRR_PHYS_BASE_6
;
387 regNum
= MISCREG_MTRR_PHYS_MASK_6
;
390 regNum
= MISCREG_MTRR_PHYS_BASE_7
;
393 regNum
= MISCREG_MTRR_PHYS_MASK_7
;
396 regNum
= MISCREG_MTRR_FIX_64K_00000
;
399 regNum
= MISCREG_MTRR_FIX_16K_80000
;
402 regNum
= MISCREG_MTRR_FIX_16K_A0000
;
405 regNum
= MISCREG_MTRR_FIX_4K_C0000
;
408 regNum
= MISCREG_MTRR_FIX_4K_C8000
;
411 regNum
= MISCREG_MTRR_FIX_4K_D0000
;
414 regNum
= MISCREG_MTRR_FIX_4K_D8000
;
417 regNum
= MISCREG_MTRR_FIX_4K_E0000
;
420 regNum
= MISCREG_MTRR_FIX_4K_E8000
;
423 regNum
= MISCREG_MTRR_FIX_4K_F0000
;
426 regNum
= MISCREG_MTRR_FIX_4K_F8000
;
429 regNum
= MISCREG_PAT
;
432 regNum
= MISCREG_DEF_TYPE
;
435 regNum
= MISCREG_MC0_CTL
;
438 regNum
= MISCREG_MC1_CTL
;
441 regNum
= MISCREG_MC2_CTL
;
444 regNum
= MISCREG_MC3_CTL
;
447 regNum
= MISCREG_MC4_CTL
;
450 regNum
= MISCREG_MC5_CTL
;
453 regNum
= MISCREG_MC6_CTL
;
456 regNum
= MISCREG_MC7_CTL
;
459 regNum
= MISCREG_MC0_STATUS
;
462 regNum
= MISCREG_MC1_STATUS
;
465 regNum
= MISCREG_MC2_STATUS
;
468 regNum
= MISCREG_MC3_STATUS
;
471 regNum
= MISCREG_MC4_STATUS
;
474 regNum
= MISCREG_MC5_STATUS
;
477 regNum
= MISCREG_MC6_STATUS
;
480 regNum
= MISCREG_MC7_STATUS
;
483 regNum
= MISCREG_MC0_ADDR
;
486 regNum
= MISCREG_MC1_ADDR
;
489 regNum
= MISCREG_MC2_ADDR
;
492 regNum
= MISCREG_MC3_ADDR
;
495 regNum
= MISCREG_MC4_ADDR
;
498 regNum
= MISCREG_MC5_ADDR
;
501 regNum
= MISCREG_MC6_ADDR
;
504 regNum
= MISCREG_MC7_ADDR
;
507 regNum
= MISCREG_MC0_MISC
;
510 regNum
= MISCREG_MC1_MISC
;
513 regNum
= MISCREG_MC2_MISC
;
516 regNum
= MISCREG_MC3_MISC
;
519 regNum
= MISCREG_MC4_MISC
;
522 regNum
= MISCREG_MC5_MISC
;
525 regNum
= MISCREG_MC6_MISC
;
528 regNum
= MISCREG_MC7_MISC
;
531 regNum
= MISCREG_EFER
;
534 regNum
= MISCREG_STAR
;
537 regNum
= MISCREG_LSTAR
;
540 regNum
= MISCREG_CSTAR
;
543 regNum
= MISCREG_SF_MASK
;
546 regNum
= MISCREG_FS_BASE
;
549 regNum
= MISCREG_GS_BASE
;
552 regNum
= MISCREG_KERNEL_GS_BASE
;
555 regNum
= MISCREG_TSC_AUX
;
558 regNum
= MISCREG_PERF_EVT_SEL0
;
561 regNum
= MISCREG_PERF_EVT_SEL1
;
564 regNum
= MISCREG_PERF_EVT_SEL2
;
567 regNum
= MISCREG_PERF_EVT_SEL3
;
570 regNum
= MISCREG_PERF_EVT_CTR0
;
573 regNum
= MISCREG_PERF_EVT_CTR1
;
576 regNum
= MISCREG_PERF_EVT_CTR2
;
579 regNum
= MISCREG_PERF_EVT_CTR3
;
582 regNum
= MISCREG_SYSCFG
;
585 regNum
= MISCREG_IORR_BASE0
;
588 regNum
= MISCREG_IORR_BASE1
;
591 regNum
= MISCREG_IORR_MASK0
;
594 regNum
= MISCREG_IORR_MASK1
;
597 regNum
= MISCREG_TOP_MEM
;
600 regNum
= MISCREG_TOP_MEM2
;
603 regNum
= MISCREG_VM_CR
;
606 regNum
= MISCREG_IGNNE
;
609 regNum
= MISCREG_SMM_CTL
;
612 regNum
= MISCREG_VM_HSAVE_PA
;
615 return std::make_shared
<GeneralProtection
>(0);
617 //The index is multiplied by the size of a MiscReg so that
618 //any memory dependence calculations will not see these as
620 req
->setPaddr(regNum
* sizeof(RegVal
));
622 } else if (prefix
== IntAddrPrefixIO
) {
623 // TODO If CPL > IOPL or in virtual mode, check the I/O permission
624 // bitmap in the TSS.
626 Addr IOPort
= vaddr
& ~IntAddrPrefixMask
;
627 // Make sure the address fits in the expected 16 bit IO address
629 assert(!(IOPort
& ~0xFFFF));
631 if (IOPort
== 0xCF8 && req
->getSize() == 4) {
632 req
->setFlags(Request::MMAPPED_IPR
);
633 req
->setPaddr(MISCREG_PCI_CONFIG_ADDRESS
* sizeof(RegVal
));
634 } else if ((IOPort
& ~mask(2)) == 0xCFC) {
635 req
->setFlags(Request::UNCACHEABLE
);
638 tc
->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS
);
640 if (bits(configAddress
, 31, 31)) {
641 req
->setPaddr(PhysAddrPrefixPciConfig
|
642 mbits(configAddress
, 30, 2) |
645 req
->setPaddr(PhysAddrPrefixIO
| IOPort
);
648 req
->setFlags(Request::UNCACHEABLE
);
649 req
->setPaddr(PhysAddrPrefixIO
| IOPort
);
653 panic("Access to unrecognized internal address space %#x.\n",
659 * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
660 * and false on a TLB miss.
661 * Many of the checks about different modes have been converted to
662 * assertions, since these parts of the code are not really used.
663 * On a hit it will update the LRU stack.
666 GpuTLB::tlbLookup(const RequestPtr
&req
,
667 ThreadContext
*tc
, bool update_stats
)
669 bool tlb_hit
= false;
671 uint32_t flags
= req
->getFlags();
672 int seg
= flags
& SegmentFlagMask
;
675 assert(seg
!= SEGMENT_REG_MS
);
676 Addr vaddr
= req
->getVaddr();
677 DPRINTF(GPUTLB
, "TLB Lookup for vaddr %#x.\n", vaddr
);
678 HandyM5Reg m5Reg
= tc
->readMiscRegNoEffect(MISCREG_M5_REG
);
681 DPRINTF(GPUTLB
, "In protected mode.\n");
682 // make sure we are in 64-bit mode
683 assert(m5Reg
.mode
== LongMode
);
685 // If paging is enabled, do the translation.
687 DPRINTF(GPUTLB
, "Paging enabled.\n");
688 //update LRU stack on a hit
689 TlbEntry
*entry
= lookup(vaddr
, true);
695 // functional tlb access for memory initialization
696 // i.e., memory seeding or instr. seeding -> don't update
701 localNumTLBAccesses
++;
715 GpuTLB::translate(const RequestPtr
&req
, ThreadContext
*tc
,
716 Translation
*translation
, Mode mode
,
717 bool &delayedResponse
, bool timing
, int &latency
)
719 uint32_t flags
= req
->getFlags();
720 int seg
= flags
& SegmentFlagMask
;
721 bool storeCheck
= flags
& (StoreCheck
<< FlagShift
);
723 // If this is true, we're dealing with a request
724 // to a non-memory address space.
725 if (seg
== SEGMENT_REG_MS
) {
726 return translateInt(req
, tc
);
729 delayedResponse
= false;
730 Addr vaddr
= req
->getVaddr();
731 DPRINTF(GPUTLB
, "Translating vaddr %#x.\n", vaddr
);
733 HandyM5Reg m5Reg
= tc
->readMiscRegNoEffect(MISCREG_M5_REG
);
735 // If protected mode has been enabled...
737 DPRINTF(GPUTLB
, "In protected mode.\n");
738 // If we're not in 64-bit mode, do protection/limit checks
739 if (m5Reg
.mode
!= LongMode
) {
740 DPRINTF(GPUTLB
, "Not in long mode. Checking segment "
743 // Check for a null segment selector.
744 if (!(seg
== SEGMENT_REG_TSG
|| seg
== SYS_SEGMENT_REG_IDTR
||
745 seg
== SEGMENT_REG_HS
|| seg
== SEGMENT_REG_LS
)
746 && !tc
->readMiscRegNoEffect(MISCREG_SEG_SEL(seg
))) {
747 return std::make_shared
<GeneralProtection
>(0);
750 bool expandDown
= false;
751 SegAttr attr
= tc
->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg
));
753 if (seg
>= SEGMENT_REG_ES
&& seg
<= SEGMENT_REG_HS
) {
754 if (!attr
.writable
&& (mode
== BaseTLB::Write
||
756 return std::make_shared
<GeneralProtection
>(0);
758 if (!attr
.readable
&& mode
== BaseTLB::Read
)
759 return std::make_shared
<GeneralProtection
>(0);
761 expandDown
= attr
.expandDown
;
765 Addr base
= tc
->readMiscRegNoEffect(MISCREG_SEG_BASE(seg
));
766 Addr limit
= tc
->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg
));
767 // This assumes we're not in 64 bit mode. If we were, the
768 // default address size is 64 bits, overridable to 32.
770 bool sizeOverride
= (flags
& (AddrSizeFlagBit
<< FlagShift
));
771 SegAttr csAttr
= tc
->readMiscRegNoEffect(MISCREG_CS_ATTR
);
773 if ((csAttr
.defaultSize
&& sizeOverride
) ||
774 (!csAttr
.defaultSize
&& !sizeOverride
)) {
778 Addr offset
= bits(vaddr
- base
, size
- 1, 0);
779 Addr endOffset
= offset
+ req
->getSize() - 1;
782 DPRINTF(GPUTLB
, "Checking an expand down segment.\n");
783 warn_once("Expand down segments are untested.\n");
785 if (offset
<= limit
|| endOffset
<= limit
)
786 return std::make_shared
<GeneralProtection
>(0);
788 if (offset
> limit
|| endOffset
> limit
)
789 return std::make_shared
<GeneralProtection
>(0);
793 // If paging is enabled, do the translation.
795 DPRINTF(GPUTLB
, "Paging enabled.\n");
796 // The vaddr already has the segment base applied.
797 TlbEntry
*entry
= lookup(vaddr
);
798 localNumTLBAccesses
++;
803 latency
= missLatency1
;
807 fatal("GpuTLB doesn't support full-system mode\n");
809 DPRINTF(GPUTLB
, "Handling a TLB miss for address %#x "
810 "at pc %#x.\n", vaddr
, tc
->instAddr());
812 Process
*p
= tc
->getProcessPtr();
813 const EmulationPageTable::Entry
*pte
=
814 p
->pTable
->lookup(vaddr
);
816 if (!pte
&& mode
!= BaseTLB::Execute
) {
817 // penalize a "page fault" more
819 latency
+= missLatency2
;
821 if (p
->fixupStackFault(vaddr
))
822 pte
= p
->pTable
->lookup(vaddr
);
826 return std::make_shared
<PageFault
>(vaddr
, true,
830 Addr alignedVaddr
= p
->pTable
->pageAlign(vaddr
);
832 DPRINTF(GPUTLB
, "Mapping %#x to %#x\n",
833 alignedVaddr
, pte
->paddr
);
835 TlbEntry
gpuEntry(p
->pid(), alignedVaddr
,
836 pte
->paddr
, false, false);
837 entry
= insert(alignedVaddr
, gpuEntry
);
840 DPRINTF(GPUTLB
, "Miss was serviced.\n");
846 latency
= hitLatency
;
850 // Do paging protection checks.
851 bool inUser
= (m5Reg
.cpl
== 3 &&
852 !(flags
& (CPL0FlagBit
<< FlagShift
)));
854 CR0 cr0
= tc
->readMiscRegNoEffect(MISCREG_CR0
);
855 bool badWrite
= (!entry
->writable
&& (inUser
|| cr0
.wp
));
857 if ((inUser
&& !entry
->user
) || (mode
== BaseTLB::Write
&&
859 // The page must have been present to get into the TLB in
860 // the first place. We'll assume the reserved bits are
861 // fine even though we're not checking them.
862 return std::make_shared
<PageFault
>(vaddr
, true, mode
,
866 if (storeCheck
&& badWrite
) {
867 // This would fault if this were a write, so return a page
868 // fault that reflects that happening.
869 return std::make_shared
<PageFault
>(vaddr
, true,
875 DPRINTF(GPUTLB
, "Entry found with paddr %#x, doing protection "
876 "checks.\n", entry
->paddr
);
878 int page_size
= entry
->size();
879 Addr paddr
= entry
->paddr
| (vaddr
& (page_size
- 1));
880 DPRINTF(GPUTLB
, "Translated %#x -> %#x.\n", vaddr
, paddr
);
881 req
->setPaddr(paddr
);
883 if (entry
->uncacheable
)
884 req
->setFlags(Request::UNCACHEABLE
);
886 //Use the address which already has segmentation applied.
887 DPRINTF(GPUTLB
, "Paging disabled.\n");
888 DPRINTF(GPUTLB
, "Translated %#x -> %#x.\n", vaddr
, vaddr
);
889 req
->setPaddr(vaddr
);
893 DPRINTF(GPUTLB
, "In real mode.\n");
894 DPRINTF(GPUTLB
, "Translated %#x -> %#x.\n", vaddr
, vaddr
);
895 req
->setPaddr(vaddr
);
898 // Check for an access to the local APIC
900 LocalApicBase localApicBase
=
901 tc
->readMiscRegNoEffect(MISCREG_APIC_BASE
);
903 Addr baseAddr
= localApicBase
.base
* PageBytes
;
904 Addr paddr
= req
->getPaddr();
906 if (baseAddr
<= paddr
&& baseAddr
+ PageBytes
> paddr
) {
907 // Force the access to be uncacheable.
908 req
->setFlags(Request::UNCACHEABLE
);
909 req
->setPaddr(x86LocalAPICAddress(tc
->contextId(),
918 GpuTLB::translateAtomic(const RequestPtr
&req
, ThreadContext
*tc
,
919 Mode mode
, int &latency
)
921 bool delayedResponse
;
923 return GpuTLB::translate(req
, tc
, nullptr, mode
, delayedResponse
, false,
928 GpuTLB::translateTiming(const RequestPtr
&req
, ThreadContext
*tc
,
929 Translation
*translation
, Mode mode
, int &latency
)
931 bool delayedResponse
;
934 Fault fault
= GpuTLB::translate(req
, tc
, translation
, mode
,
935 delayedResponse
, true, latency
);
937 if (!delayedResponse
)
938 translation
->finish(fault
, req
, tc
, mode
);
949 GpuTLB::serialize(CheckpointOut
&cp
) const
954 GpuTLB::unserialize(CheckpointIn
&cp
)
961 MemObject::regStats();
964 .name(name() + ".local_TLB_accesses")
965 .desc("Number of TLB accesses")
969 .name(name() + ".local_TLB_hits")
970 .desc("Number of TLB hits")
974 .name(name() + ".local_TLB_misses")
975 .desc("Number of TLB misses")
979 .name(name() + ".local_TLB_miss_rate")
980 .desc("TLB miss rate")
984 .name(name() + ".access_cycles")
985 .desc("Cycles spent accessing this TLB level")
989 .name(name() + ".page_table_cycles")
990 .desc("Cycles spent accessing the page table")
993 localTLBMissRate
= 100 * localNumTLBMisses
/ localNumTLBAccesses
;
996 .name(name() + ".unique_pages")
997 .desc("Number of unique pages touched")
1001 .name(name() + ".local_cycles")
1002 .desc("Number of cycles spent in queue for all incoming reqs")
1006 .name(name() + ".local_latency")
1007 .desc("Avg. latency over incoming coalesced reqs")
1010 localLatency
= localCycles
/ localNumTLBAccesses
;
1012 globalNumTLBAccesses
1013 .name(name() + ".global_TLB_accesses")
1014 .desc("Number of TLB accesses")
1018 .name(name() + ".global_TLB_hits")
1019 .desc("Number of TLB hits")
1023 .name(name() + ".global_TLB_misses")
1024 .desc("Number of TLB misses")
1028 .name(name() + ".global_TLB_miss_rate")
1029 .desc("TLB miss rate")
1032 globalTLBMissRate
= 100 * globalNumTLBMisses
/ globalNumTLBAccesses
;
1035 .name(name() + ".avg_reuse_distance")
1036 .desc("avg. reuse distance over all pages (in ticks)")
1042 * Do the TLB lookup for this coalesced request and schedule
1043 * another event <TLB access latency> cycles later.
1047 GpuTLB::issueTLBLookup(PacketPtr pkt
)
1050 assert(pkt
->senderState
);
1052 Addr virt_page_addr
= roundDown(pkt
->req
->getVaddr(),
1055 TranslationState
*sender_state
=
1056 safe_cast
<TranslationState
*>(pkt
->senderState
);
1058 bool update_stats
= !sender_state
->prefetch
;
1059 ThreadContext
* tmp_tc
= sender_state
->tc
;
1061 DPRINTF(GPUTLB
, "Translation req. for virt. page addr %#x\n",
1064 int req_cnt
= sender_state
->reqCnt
.back();
1067 accessCycles
-= (curTick() * req_cnt
);
1068 localCycles
-= curTick();
1069 updatePageFootprint(virt_page_addr
);
1070 globalNumTLBAccesses
+= req_cnt
;
1073 tlbOutcome lookup_outcome
= TLB_MISS
;
1074 const RequestPtr
&tmp_req
= pkt
->req
;
1076 // Access the TLB and figure out if it's a hit or a miss.
1077 bool success
= tlbLookup(tmp_req
, tmp_tc
, update_stats
);
1080 lookup_outcome
= TLB_HIT
;
1081 // Put the entry in SenderState
1082 TlbEntry
*entry
= lookup(tmp_req
->getVaddr(), false);
1085 auto p
= sender_state
->tc
->getProcessPtr();
1086 sender_state
->tlbEntry
=
1087 new TlbEntry(p
->pid(), entry
->vaddr
, entry
->paddr
,
1091 // the reqCnt has an entry per level, so its size tells us
1092 // which level we are in
1093 sender_state
->hitLevel
= sender_state
->reqCnt
.size();
1094 globalNumTLBHits
+= req_cnt
;
1098 globalNumTLBMisses
+= req_cnt
;
1102 * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1103 * as the TLB access latency.
1105 * We create and schedule a new TLBEvent which will help us take the
1106 * appropriate actions (e.g., update TLB on a hit, send request to lower
1107 * level TLB on a miss, or start a page walk if this was the last-level
1110 TLBEvent
*tlb_event
=
1111 new TLBEvent(this, virt_page_addr
, lookup_outcome
, pkt
);
1113 if (translationReturnEvent
.count(virt_page_addr
)) {
1114 panic("Virtual Page Address %#x already has a return event\n",
1118 translationReturnEvent
[virt_page_addr
] = tlb_event
;
1121 DPRINTF(GPUTLB
, "schedule translationReturnEvent @ curTick %d\n",
1122 curTick() + this->ticks(hitLatency
));
1124 schedule(tlb_event
, curTick() + this->ticks(hitLatency
));
1127 GpuTLB::TLBEvent::TLBEvent(GpuTLB
* _tlb
, Addr _addr
, tlbOutcome tlb_outcome
,
1129 : Event(CPU_Tick_Pri
), tlb(_tlb
), virtPageAddr(_addr
),
1130 outcome(tlb_outcome
), pkt(_pkt
)
1135 * Do Paging protection checks. If we encounter a page fault, then
1136 * an assertion is fired.
1139 GpuTLB::pagingProtectionChecks(ThreadContext
*tc
, PacketPtr pkt
,
1140 TlbEntry
* tlb_entry
, Mode mode
)
1142 HandyM5Reg m5Reg
= tc
->readMiscRegNoEffect(MISCREG_M5_REG
);
1143 uint32_t flags
= pkt
->req
->getFlags();
1144 bool storeCheck
= flags
& (StoreCheck
<< FlagShift
);
1146 // Do paging protection checks.
1147 bool inUser
= (m5Reg
.cpl
== 3 && !(flags
& (CPL0FlagBit
<< FlagShift
)));
1148 CR0 cr0
= tc
->readMiscRegNoEffect(MISCREG_CR0
);
1150 bool badWrite
= (!tlb_entry
->writable
&& (inUser
|| cr0
.wp
));
1152 if ((inUser
&& !tlb_entry
->user
) ||
1153 (mode
== BaseTLB::Write
&& badWrite
)) {
1154 // The page must have been present to get into the TLB in
1155 // the first place. We'll assume the reserved bits are
1156 // fine even though we're not checking them.
1157 panic("Page fault detected");
1160 if (storeCheck
&& badWrite
) {
1161 // This would fault if this were a write, so return a page
1162 // fault that reflects that happening.
1163 panic("Page fault detected");
1168 * handleTranslationReturn is called on a TLB hit,
1169 * when a TLB miss returns or when a page fault returns.
1170 * The latter calls handelHit with TLB miss as tlbOutcome.
1173 GpuTLB::handleTranslationReturn(Addr virt_page_addr
, tlbOutcome tlb_outcome
,
1178 Addr vaddr
= pkt
->req
->getVaddr();
1180 TranslationState
*sender_state
=
1181 safe_cast
<TranslationState
*>(pkt
->senderState
);
1183 ThreadContext
*tc
= sender_state
->tc
;
1184 Mode mode
= sender_state
->tlbMode
;
1186 TlbEntry
*local_entry
, *new_entry
;
1188 if (tlb_outcome
== TLB_HIT
) {
1189 DPRINTF(GPUTLB
, "Translation Done - TLB Hit for addr %#x\n", vaddr
);
1190 local_entry
= sender_state
->tlbEntry
;
1192 DPRINTF(GPUTLB
, "Translation Done - TLB Miss for addr %#x\n",
1195 // We are returning either from a page walk or from a hit at a lower
1196 // TLB level. The senderState should be "carrying" a pointer to the
1197 // correct TLBEntry.
1198 new_entry
= sender_state
->tlbEntry
;
1200 local_entry
= new_entry
;
1202 if (allocationPolicy
) {
1203 DPRINTF(GPUTLB
, "allocating entry w/ addr %#x\n",
1206 local_entry
= insert(virt_page_addr
, *new_entry
);
1209 assert(local_entry
);
1213 * At this point the packet carries an up-to-date tlbEntry pointer
1214 * in its senderState.
1215 * Next step is to do the paging protection checks.
1217 DPRINTF(GPUTLB
, "Entry found with vaddr %#x, doing protection checks "
1218 "while paddr was %#x.\n", local_entry
->vaddr
,
1219 local_entry
->paddr
);
1221 pagingProtectionChecks(tc
, pkt
, local_entry
, mode
);
1222 int page_size
= local_entry
->size();
1223 Addr paddr
= local_entry
->paddr
| (vaddr
& (page_size
- 1));
1224 DPRINTF(GPUTLB
, "Translated %#x -> %#x.\n", vaddr
, paddr
);
1226 // Since this packet will be sent through the cpu side slave port,
1227 // it must be converted to a response pkt if it is not one already
1228 if (pkt
->isRequest()) {
1229 pkt
->makeTimingResponse();
1232 pkt
->req
->setPaddr(paddr
);
1234 if (local_entry
->uncacheable
) {
1235 pkt
->req
->setFlags(Request::UNCACHEABLE
);
1238 //send packet back to coalescer
1239 cpuSidePort
[0]->sendTimingResp(pkt
);
1240 //schedule cleanup event
1241 cleanupQueue
.push(virt_page_addr
);
1243 // schedule this only once per cycle.
1244 // The check is required because we might have multiple translations
1245 // returning the same cycle
1246 // this is a maximum priority event and must be on the same cycle
1247 // as the cleanup event in TLBCoalescer to avoid a race with
1248 // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1249 if (!cleanupEvent
.scheduled())
1250 schedule(cleanupEvent
, curTick());
1254 * Here we take the appropriate actions based on the result of the
1258 GpuTLB::translationReturn(Addr virtPageAddr
, tlbOutcome outcome
,
1261 DPRINTF(GPUTLB
, "Triggered TLBEvent for addr %#x\n", virtPageAddr
);
1263 assert(translationReturnEvent
[virtPageAddr
]);
1266 TranslationState
*tmp_sender_state
=
1267 safe_cast
<TranslationState
*>(pkt
->senderState
);
1269 int req_cnt
= tmp_sender_state
->reqCnt
.back();
1270 bool update_stats
= !tmp_sender_state
->prefetch
;
1273 if (outcome
== TLB_HIT
) {
1274 handleTranslationReturn(virtPageAddr
, TLB_HIT
, pkt
);
1277 accessCycles
+= (req_cnt
* curTick());
1278 localCycles
+= curTick();
1281 } else if (outcome
== TLB_MISS
) {
1283 DPRINTF(GPUTLB
, "This is a TLB miss\n");
1285 accessCycles
+= (req_cnt
*curTick());
1286 localCycles
+= curTick();
1289 if (hasMemSidePort
) {
1290 // the one cyle added here represent the delay from when we get
1291 // the reply back till when we propagate it to the coalescer
1294 accessCycles
+= (req_cnt
* 1);
1299 * There is a TLB below. Send the coalesced request.
1300 * We actually send the very first packet of all the
1301 * pending packets for this virtual page address.
1303 if (!memSidePort
[0]->sendTimingReq(pkt
)) {
1304 DPRINTF(GPUTLB
, "Failed sending translation request to "
1305 "lower level TLB for addr %#x\n", virtPageAddr
);
1307 memSidePort
[0]->retries
.push_back(pkt
);
1309 DPRINTF(GPUTLB
, "Sent translation request to lower level "
1310 "TLB for addr %#x\n", virtPageAddr
);
1313 //this is the last level TLB. Start a page walk
1314 DPRINTF(GPUTLB
, "Last level TLB - start a page walk for "
1315 "addr %#x\n", virtPageAddr
);
1318 pageTableCycles
-= (req_cnt
*curTick());
1320 TLBEvent
*tlb_event
= translationReturnEvent
[virtPageAddr
];
1322 tlb_event
->updateOutcome(PAGE_WALK
);
1323 schedule(tlb_event
, curTick() + ticks(missLatency2
));
1325 } else if (outcome
== PAGE_WALK
) {
1327 pageTableCycles
+= (req_cnt
*curTick());
1329 // Need to access the page table and update the TLB
1330 DPRINTF(GPUTLB
, "Doing a page walk for address %#x\n",
1333 TranslationState
*sender_state
=
1334 safe_cast
<TranslationState
*>(pkt
->senderState
);
1336 Process
*p
= sender_state
->tc
->getProcessPtr();
1337 Addr vaddr
= pkt
->req
->getVaddr();
1339 Addr alignedVaddr
= p
->pTable
->pageAlign(vaddr
);
1340 assert(alignedVaddr
== virtPageAddr
);
1342 const EmulationPageTable::Entry
*pte
= p
->pTable
->lookup(vaddr
);
1343 if (!pte
&& sender_state
->tlbMode
!= BaseTLB::Execute
&&
1344 p
->fixupStackFault(vaddr
)) {
1345 pte
= p
->pTable
->lookup(vaddr
);
1349 DPRINTF(GPUTLB
, "Mapping %#x to %#x\n", alignedVaddr
,
1352 sender_state
->tlbEntry
=
1353 new TlbEntry(p
->pid(), virtPageAddr
, pte
->paddr
, false,
1356 sender_state
->tlbEntry
= nullptr;
1359 handleTranslationReturn(virtPageAddr
, TLB_MISS
, pkt
);
1360 } else if (outcome
== MISS_RETURN
) {
1361 /** we add an extra cycle in the return path of the translation
1362 * requests in between the various TLB levels.
1364 handleTranslationReturn(virtPageAddr
, TLB_MISS
, pkt
);
1366 panic("Unexpected TLB outcome %d", outcome
);
1371 GpuTLB::TLBEvent::process()
1373 tlb
->translationReturn(virtPageAddr
, outcome
, pkt
);
1377 GpuTLB::TLBEvent::description() const
1379 return "trigger translationDoneEvent";
1383 GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome
)
1389 GpuTLB::TLBEvent::getTLBEventVaddr()
1391 return virtPageAddr
;
1395 * recvTiming receives a coalesced timing request from a TLBCoalescer
1396 * and it calls issueTLBLookup()
1397 * It only rejects the packet if we have exceeded the max
1398 * outstanding number of requests for the TLB
1401 GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt
)
1403 if (tlb
->outstandingReqs
< tlb
->maxCoalescedReqs
) {
1404 tlb
->issueTLBLookup(pkt
);
1405 // update number of outstanding translation requests
1406 tlb
->outstandingReqs
++;
1409 DPRINTF(GPUTLB
, "Reached maxCoalescedReqs number %d\n",
1410 tlb
->outstandingReqs
);
1416 * handleFuncTranslationReturn is called on a TLB hit,
1417 * when a TLB miss returns or when a page fault returns.
1418 * It updates LRU, inserts the TLB entry on a miss
1419 * depending on the allocation policy and does the required
1420 * protection checks. It does NOT create a new packet to
1421 * update the packet's addr; this is done in hsail-gpu code.
1424 GpuTLB::handleFuncTranslationReturn(PacketPtr pkt
, tlbOutcome tlb_outcome
)
1426 TranslationState
*sender_state
=
1427 safe_cast
<TranslationState
*>(pkt
->senderState
);
1429 ThreadContext
*tc
= sender_state
->tc
;
1430 Mode mode
= sender_state
->tlbMode
;
1431 Addr vaddr
= pkt
->req
->getVaddr();
1433 TlbEntry
*local_entry
, *new_entry
;
1435 if (tlb_outcome
== TLB_HIT
) {
1436 DPRINTF(GPUTLB
, "Functional Translation Done - TLB hit for addr "
1439 local_entry
= sender_state
->tlbEntry
;
1441 DPRINTF(GPUTLB
, "Functional Translation Done - TLB miss for addr "
1444 // We are returning either from a page walk or from a hit at a lower
1445 // TLB level. The senderState should be "carrying" a pointer to the
1446 // correct TLBEntry.
1447 new_entry
= sender_state
->tlbEntry
;
1449 local_entry
= new_entry
;
1451 if (allocationPolicy
) {
1452 Addr virt_page_addr
= roundDown(vaddr
, TheISA::PageBytes
);
1454 DPRINTF(GPUTLB
, "allocating entry w/ addr %#x\n",
1457 local_entry
= insert(virt_page_addr
, *new_entry
);
1460 assert(local_entry
);
1463 DPRINTF(GPUTLB
, "Entry found with vaddr %#x, doing protection checks "
1464 "while paddr was %#x.\n", local_entry
->vaddr
,
1465 local_entry
->paddr
);
1468 * Do paging checks if it's a normal functional access. If it's for a
1469 * prefetch, then sometimes you can try to prefetch something that
1470 * won't pass protection. We don't actually want to fault becuase there
1471 * is no demand access to deem this a violation. Just put it in the
1472 * TLB and it will fault if indeed a future demand access touches it in
1475 * This feature could be used to explore security issues around
1476 * speculative memory accesses.
1478 if (!sender_state
->prefetch
&& sender_state
->tlbEntry
)
1479 pagingProtectionChecks(tc
, pkt
, local_entry
, mode
);
1481 int page_size
= local_entry
->size();
1482 Addr paddr
= local_entry
->paddr
| (vaddr
& (page_size
- 1));
1483 DPRINTF(GPUTLB
, "Translated %#x -> %#x.\n", vaddr
, paddr
);
1485 pkt
->req
->setPaddr(paddr
);
1487 if (local_entry
->uncacheable
)
1488 pkt
->req
->setFlags(Request::UNCACHEABLE
);
1491 // This is used for atomic translations. Need to
1492 // make it all happen during the same cycle.
1494 GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt
)
1496 TranslationState
*sender_state
=
1497 safe_cast
<TranslationState
*>(pkt
->senderState
);
1499 ThreadContext
*tc
= sender_state
->tc
;
1500 bool update_stats
= !sender_state
->prefetch
;
1502 Addr virt_page_addr
= roundDown(pkt
->req
->getVaddr(),
1506 tlb
->updatePageFootprint(virt_page_addr
);
1508 // do the TLB lookup without updating the stats
1509 bool success
= tlb
->tlbLookup(pkt
->req
, tc
, update_stats
);
1510 tlbOutcome tlb_outcome
= success
? TLB_HIT
: TLB_MISS
;
1512 // functional mode means no coalescing
1513 // global metrics are the same as the local metrics
1515 tlb
->globalNumTLBAccesses
++;
1518 sender_state
->hitLevel
= sender_state
->reqCnt
.size();
1519 tlb
->globalNumTLBHits
++;
1525 tlb
->globalNumTLBMisses
++;
1526 if (tlb
->hasMemSidePort
) {
1527 // there is a TLB below -> propagate down the TLB hierarchy
1528 tlb
->memSidePort
[0]->sendFunctional(pkt
);
1529 // If no valid translation from a prefetch, then just return
1530 if (sender_state
->prefetch
&& !pkt
->req
->hasPaddr())
1533 // Need to access the page table and update the TLB
1534 DPRINTF(GPUTLB
, "Doing a page walk for address %#x\n",
1537 Process
*p
= tc
->getProcessPtr();
1539 Addr vaddr
= pkt
->req
->getVaddr();
1541 Addr alignedVaddr
= p
->pTable
->pageAlign(vaddr
);
1542 assert(alignedVaddr
== virt_page_addr
);
1545 const EmulationPageTable::Entry
*pte
=
1546 p
->pTable
->lookup(vaddr
);
1547 if (!pte
&& sender_state
->tlbMode
!= BaseTLB::Execute
&&
1548 p
->fixupStackFault(vaddr
)) {
1549 pte
= p
->pTable
->lookup(vaddr
);
1552 if (!sender_state
->prefetch
) {
1553 // no PageFaults are permitted after
1554 // the second page table lookup
1557 DPRINTF(GPUTLB
, "Mapping %#x to %#x\n", alignedVaddr
,
1560 sender_state
->tlbEntry
=
1561 new TlbEntry(p
->pid(), virt_page_addr
,
1562 pte
->paddr
, false, false);
1564 // If this was a prefetch, then do the normal thing if it
1565 // was a successful translation. Otherwise, send an empty
1566 // TLB entry back so that it can be figured out as empty and
1567 // handled accordingly.
1569 DPRINTF(GPUTLB
, "Mapping %#x to %#x\n", alignedVaddr
,
1572 sender_state
->tlbEntry
=
1573 new TlbEntry(p
->pid(), virt_page_addr
,
1574 pte
->paddr
, false, false);
1576 DPRINTF(GPUPrefetch
, "Prefetch failed %#x\n",
1579 sender_state
->tlbEntry
= nullptr;
1586 DPRINTF(GPUPrefetch
, "Functional Hit for vaddr %#x\n",
1587 tlb
->lookup(pkt
->req
->getVaddr()));
1589 TlbEntry
*entry
= tlb
->lookup(pkt
->req
->getVaddr(),
1594 auto p
= sender_state
->tc
->getProcessPtr();
1595 sender_state
->tlbEntry
=
1596 new TlbEntry(p
->pid(), entry
->vaddr
, entry
->paddr
,
1599 // This is the function that would populate pkt->req with the paddr of
1600 // the translation. But if no translation happens (i.e Prefetch fails)
1601 // then the early returns in the above code wiill keep this function
1603 tlb
->handleFuncTranslationReturn(pkt
, tlb_outcome
);
1607 GpuTLB::CpuSidePort::recvReqRetry()
1609 // The CPUSidePort never sends anything but replies. No retries
1611 panic("recvReqRetry called");
1615 GpuTLB::CpuSidePort::getAddrRanges() const
1617 // currently not checked by the master
1618 AddrRangeList ranges
;
1624 * MemSidePort receives the packet back.
1625 * We need to call the handleTranslationReturn
1626 * and propagate up the hierarchy.
1629 GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt
)
1631 Addr virt_page_addr
= roundDown(pkt
->req
->getVaddr(),
1634 DPRINTF(GPUTLB
, "MemSidePort recvTiming for virt_page_addr %#x\n",
1637 TLBEvent
*tlb_event
= tlb
->translationReturnEvent
[virt_page_addr
];
1639 assert(virt_page_addr
== tlb_event
->getTLBEventVaddr());
1641 tlb_event
->updateOutcome(MISS_RETURN
);
1642 tlb
->schedule(tlb_event
, curTick()+tlb
->ticks(1));
1648 GpuTLB::MemSidePort::recvReqRetry()
1650 // No retries should reach the TLB. The retries
1651 // should only reach the TLBCoalescer.
1652 panic("recvReqRetry called");
1658 while (!cleanupQueue
.empty()) {
1659 Addr cleanup_addr
= cleanupQueue
.front();
1663 TLBEvent
* old_tlb_event
= translationReturnEvent
[cleanup_addr
];
1664 delete old_tlb_event
;
1665 translationReturnEvent
.erase(cleanup_addr
);
1667 // update number of outstanding requests
1671 /** the higher level coalescer should retry if it has
1672 * any pending requests.
1674 for (int i
= 0; i
< cpuSidePort
.size(); ++i
) {
1675 cpuSidePort
[i
]->sendRetryReq();
1680 GpuTLB::updatePageFootprint(Addr virt_page_addr
)
1683 std::pair
<AccessPatternTable::iterator
, bool> ret
;
1685 AccessInfo tmp_access_info
;
1686 tmp_access_info
.lastTimeAccessed
= 0;
1687 tmp_access_info
.accessesPerPage
= 0;
1688 tmp_access_info
.totalReuseDistance
= 0;
1689 tmp_access_info
.sumDistance
= 0;
1690 tmp_access_info
.meanDistance
= 0;
1692 ret
= TLBFootprint
.insert(AccessPatternTable::value_type(virt_page_addr
,
1695 bool first_page_access
= ret
.second
;
1697 if (first_page_access
) {
1700 int accessed_before
;
1701 accessed_before
= curTick() - ret
.first
->second
.lastTimeAccessed
;
1702 ret
.first
->second
.totalReuseDistance
+= accessed_before
;
1705 ret
.first
->second
.accessesPerPage
++;
1706 ret
.first
->second
.lastTimeAccessed
= curTick();
1708 if (accessDistance
) {
1709 ret
.first
->second
.localTLBAccesses
1710 .push_back(localNumTLBAccesses
.value());
1715 GpuTLB::exitCallback()
1717 std::ostream
*page_stat_file
= nullptr;
1719 if (accessDistance
) {
1721 // print per page statistics to a separate file (.csv format)
1722 // simout is the gem5 output directory (default is m5out or the one
1723 // specified with -d
1724 page_stat_file
= simout
.create(name().c_str())->stream();
1727 *page_stat_file
<< "page,max_access_distance,mean_access_distance, "
1728 << "stddev_distance" << std::endl
;
1731 // update avg. reuse distance footprint
1732 AccessPatternTable::iterator iter
, iter_begin
, iter_end
;
1733 unsigned int sum_avg_reuse_distance_per_page
= 0;
1735 // iterate through all pages seen by this TLB
1736 for (iter
= TLBFootprint
.begin(); iter
!= TLBFootprint
.end(); iter
++) {
1737 sum_avg_reuse_distance_per_page
+= iter
->second
.totalReuseDistance
/
1738 iter
->second
.accessesPerPage
;
1740 if (accessDistance
) {
1741 unsigned int tmp
= iter
->second
.localTLBAccesses
[0];
1742 unsigned int prev
= tmp
;
1744 for (int i
= 0; i
< iter
->second
.localTLBAccesses
.size(); ++i
) {
1749 prev
= iter
->second
.localTLBAccesses
[i
];
1750 // update the localTLBAccesses value
1751 // with the actual differece
1752 iter
->second
.localTLBAccesses
[i
] -= tmp
;
1753 // compute the sum of AccessDistance per page
1754 // used later for mean
1755 iter
->second
.sumDistance
+=
1756 iter
->second
.localTLBAccesses
[i
];
1759 iter
->second
.meanDistance
=
1760 iter
->second
.sumDistance
/ iter
->second
.accessesPerPage
;
1762 // compute std_dev and max (we need a second round because we
1763 // need to know the mean value
1764 unsigned int max_distance
= 0;
1765 unsigned int stddev_distance
= 0;
1767 for (int i
= 0; i
< iter
->second
.localTLBAccesses
.size(); ++i
) {
1768 unsigned int tmp_access_distance
=
1769 iter
->second
.localTLBAccesses
[i
];
1771 if (tmp_access_distance
> max_distance
) {
1772 max_distance
= tmp_access_distance
;
1776 tmp_access_distance
- iter
->second
.meanDistance
;
1777 stddev_distance
+= pow(diff
, 2);
1782 sqrt(stddev_distance
/iter
->second
.accessesPerPage
);
1784 if (page_stat_file
) {
1785 *page_stat_file
<< std::hex
<< iter
->first
<< ",";
1786 *page_stat_file
<< std::dec
<< max_distance
<< ",";
1787 *page_stat_file
<< std::dec
<< iter
->second
.meanDistance
1789 *page_stat_file
<< std::dec
<< stddev_distance
;
1790 *page_stat_file
<< std::endl
;
1793 // erase the localTLBAccesses array
1794 iter
->second
.localTLBAccesses
.clear();
1798 if (!TLBFootprint
.empty()) {
1800 sum_avg_reuse_distance_per_page
/ TLBFootprint
.size();
1803 //clear the TLBFootprint map
1804 TLBFootprint
.clear();
1806 } // namespace X86ISA
1809 X86GPUTLBParams::create()
1811 return new X86ISA::GpuTLB(this);