2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
5 * For use for simulation and test purposes only
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
36 #include "gpu-compute/gpu_tlb.hh"
41 #include "arch/x86/faults.hh"
42 #include "arch/x86/insts/microldstop.hh"
43 #include "arch/x86/pagetable.hh"
44 #include "arch/x86/pagetable_walker.hh"
45 #include "arch/x86/regs/misc.hh"
46 #include "arch/x86/x86_traits.hh"
47 #include "base/bitfield.hh"
48 #include "base/output.hh"
49 #include "base/trace.hh"
50 #include "cpu/base.hh"
51 #include "cpu/thread_context.hh"
52 #include "debug/GPUPrefetch.hh"
53 #include "debug/GPUTLB.hh"
54 #include "mem/packet_access.hh"
55 #include "mem/page_table.hh"
56 #include "mem/request.hh"
57 #include "sim/process.hh"
62 GpuTLB::GpuTLB(const Params
*p
)
63 : MemObject(p
), configAddress(0), size(p
->size
),
64 cleanupEvent([this]{ cleanup(); }, name(), false,
66 exitEvent([this]{ exitCallback(); }, name())
69 assert(assoc
<= size
);
71 allocationPolicy
= p
->allocationPolicy
;
72 hasMemSidePort
= false;
73 accessDistance
= p
->accessDistance
;
74 clock
= p
->clk_domain
->clockPeriod();
76 tlb
.assign(size
, GpuTlbEntry());
78 freeList
.resize(numSets
);
79 entryList
.resize(numSets
);
81 for (int set
= 0; set
< numSets
; ++set
) {
82 for (int way
= 0; way
< assoc
; ++way
) {
83 int x
= set
* assoc
+ way
;
84 freeList
[set
].push_back(&tlb
.at(x
));
91 * @warning: the set-associative version assumes you have a
92 * fixed page size of 4KB.
93 * If the page size is greather than 4KB (as defined in the
94 * TheISA::PageBytes), then there are various issues w/ the current
95 * implementation (you'd have the same 8KB page being replicated in
98 setMask
= numSets
- 1;
101 // GpuTLB doesn't yet support full system
103 walker
->setTLB(this);
106 maxCoalescedReqs
= p
->maxOutstandingReqs
;
108 // Do not allow maxCoalescedReqs to be more than the TLB associativity
109 if (maxCoalescedReqs
> assoc
) {
110 maxCoalescedReqs
= assoc
;
111 cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc
);
115 hitLatency
= p
->hitLatency
;
116 missLatency1
= p
->missLatency1
;
117 missLatency2
= p
->missLatency2
;
119 // create the slave ports based on the number of connected ports
120 for (size_t i
= 0; i
< p
->port_slave_connection_count
; ++i
) {
121 cpuSidePort
.push_back(new CpuSidePort(csprintf("%s-port%d",
122 name(), i
), this, i
));
125 // create the master ports based on the number of connected ports
126 for (size_t i
= 0; i
< p
->port_master_connection_count
; ++i
) {
127 memSidePort
.push_back(new MemSidePort(csprintf("%s-port%d",
128 name(), i
), this, i
));
132 // fixme: this is never called?
135 // make sure all the hash-maps are empty
136 assert(translationReturnEvent
.empty());
140 GpuTLB::getSlavePort(const std::string
&if_name
, PortID idx
)
142 if (if_name
== "slave") {
143 if (idx
>= static_cast<PortID
>(cpuSidePort
.size())) {
144 panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx
);
147 return *cpuSidePort
[idx
];
149 panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name
);
154 GpuTLB::getMasterPort(const std::string
&if_name
, PortID idx
)
156 if (if_name
== "master") {
157 if (idx
>= static_cast<PortID
>(memSidePort
.size())) {
158 panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx
);
161 hasMemSidePort
= true;
163 return *memSidePort
[idx
];
165 panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name
);
170 GpuTLB::insert(Addr vpn
, GpuTlbEntry
&entry
)
172 GpuTlbEntry
*newEntry
= nullptr;
175 * vpn holds the virtual page address
176 * The least significant bits are simply masked
178 int set
= (vpn
>> TheISA::PageShift
) & setMask
;
180 if (!freeList
[set
].empty()) {
181 newEntry
= freeList
[set
].front();
182 freeList
[set
].pop_front();
184 newEntry
= entryList
[set
].back();
185 entryList
[set
].pop_back();
189 newEntry
->vaddr
= vpn
;
190 entryList
[set
].push_front(newEntry
);
195 GpuTLB::EntryList::iterator
196 GpuTLB::lookupIt(Addr va
, bool update_lru
)
198 int set
= (va
>> TheISA::PageShift
) & setMask
;
204 auto entry
= entryList
[set
].begin();
205 for (; entry
!= entryList
[set
].end(); ++entry
) {
206 int page_size
= (*entry
)->size();
208 if ((*entry
)->vaddr
<= va
&& (*entry
)->vaddr
+ page_size
> va
) {
209 DPRINTF(GPUTLB
, "Matched vaddr %#x to entry starting at %#x "
210 "with size %#x.\n", va
, (*entry
)->vaddr
, page_size
);
213 entryList
[set
].push_front(*entry
);
214 entryList
[set
].erase(entry
);
215 entry
= entryList
[set
].begin();
226 GpuTLB::lookup(Addr va
, bool update_lru
)
228 int set
= (va
>> TheISA::PageShift
) & setMask
;
230 auto entry
= lookupIt(va
, update_lru
);
232 if (entry
== entryList
[set
].end())
239 GpuTLB::invalidateAll()
241 DPRINTF(GPUTLB
, "Invalidating all entries.\n");
243 for (int i
= 0; i
< numSets
; ++i
) {
244 while (!entryList
[i
].empty()) {
245 GpuTlbEntry
*entry
= entryList
[i
].front();
246 entryList
[i
].pop_front();
247 freeList
[i
].push_back(entry
);
253 GpuTLB::setConfigAddress(uint32_t addr
)
255 configAddress
= addr
;
259 GpuTLB::invalidateNonGlobal()
261 DPRINTF(GPUTLB
, "Invalidating all non global entries.\n");
263 for (int i
= 0; i
< numSets
; ++i
) {
264 for (auto entryIt
= entryList
[i
].begin();
265 entryIt
!= entryList
[i
].end();) {
266 if (!(*entryIt
)->global
) {
267 freeList
[i
].push_back(*entryIt
);
268 entryList
[i
].erase(entryIt
++);
277 GpuTLB::demapPage(Addr va
, uint64_t asn
)
280 int set
= (va
>> TheISA::PageShift
) & setMask
;
281 auto entry
= lookupIt(va
, false);
283 if (entry
!= entryList
[set
].end()) {
284 freeList
[set
].push_back(*entry
);
285 entryList
[set
].erase(entry
);
290 GpuTLB::translateInt(RequestPtr req
, ThreadContext
*tc
)
292 DPRINTF(GPUTLB
, "Addresses references internal memory.\n");
293 Addr vaddr
= req
->getVaddr();
294 Addr prefix
= (vaddr
>> 3) & IntAddrPrefixMask
;
296 if (prefix
== IntAddrPrefixCPUID
) {
297 panic("CPUID memory space not yet implemented!\n");
298 } else if (prefix
== IntAddrPrefixMSR
) {
300 req
->setFlags(Request::MMAPPED_IPR
);
303 switch (vaddr
& ~IntAddrPrefixMask
) {
305 regNum
= MISCREG_TSC
;
308 regNum
= MISCREG_APIC_BASE
;
311 regNum
= MISCREG_MTRRCAP
;
314 regNum
= MISCREG_SYSENTER_CS
;
317 regNum
= MISCREG_SYSENTER_ESP
;
320 regNum
= MISCREG_SYSENTER_EIP
;
323 regNum
= MISCREG_MCG_CAP
;
326 regNum
= MISCREG_MCG_STATUS
;
329 regNum
= MISCREG_MCG_CTL
;
332 regNum
= MISCREG_DEBUG_CTL_MSR
;
335 regNum
= MISCREG_LAST_BRANCH_FROM_IP
;
338 regNum
= MISCREG_LAST_BRANCH_TO_IP
;
341 regNum
= MISCREG_LAST_EXCEPTION_FROM_IP
;
344 regNum
= MISCREG_LAST_EXCEPTION_TO_IP
;
347 regNum
= MISCREG_MTRR_PHYS_BASE_0
;
350 regNum
= MISCREG_MTRR_PHYS_MASK_0
;
353 regNum
= MISCREG_MTRR_PHYS_BASE_1
;
356 regNum
= MISCREG_MTRR_PHYS_MASK_1
;
359 regNum
= MISCREG_MTRR_PHYS_BASE_2
;
362 regNum
= MISCREG_MTRR_PHYS_MASK_2
;
365 regNum
= MISCREG_MTRR_PHYS_BASE_3
;
368 regNum
= MISCREG_MTRR_PHYS_MASK_3
;
371 regNum
= MISCREG_MTRR_PHYS_BASE_4
;
374 regNum
= MISCREG_MTRR_PHYS_MASK_4
;
377 regNum
= MISCREG_MTRR_PHYS_BASE_5
;
380 regNum
= MISCREG_MTRR_PHYS_MASK_5
;
383 regNum
= MISCREG_MTRR_PHYS_BASE_6
;
386 regNum
= MISCREG_MTRR_PHYS_MASK_6
;
389 regNum
= MISCREG_MTRR_PHYS_BASE_7
;
392 regNum
= MISCREG_MTRR_PHYS_MASK_7
;
395 regNum
= MISCREG_MTRR_FIX_64K_00000
;
398 regNum
= MISCREG_MTRR_FIX_16K_80000
;
401 regNum
= MISCREG_MTRR_FIX_16K_A0000
;
404 regNum
= MISCREG_MTRR_FIX_4K_C0000
;
407 regNum
= MISCREG_MTRR_FIX_4K_C8000
;
410 regNum
= MISCREG_MTRR_FIX_4K_D0000
;
413 regNum
= MISCREG_MTRR_FIX_4K_D8000
;
416 regNum
= MISCREG_MTRR_FIX_4K_E0000
;
419 regNum
= MISCREG_MTRR_FIX_4K_E8000
;
422 regNum
= MISCREG_MTRR_FIX_4K_F0000
;
425 regNum
= MISCREG_MTRR_FIX_4K_F8000
;
428 regNum
= MISCREG_PAT
;
431 regNum
= MISCREG_DEF_TYPE
;
434 regNum
= MISCREG_MC0_CTL
;
437 regNum
= MISCREG_MC1_CTL
;
440 regNum
= MISCREG_MC2_CTL
;
443 regNum
= MISCREG_MC3_CTL
;
446 regNum
= MISCREG_MC4_CTL
;
449 regNum
= MISCREG_MC5_CTL
;
452 regNum
= MISCREG_MC6_CTL
;
455 regNum
= MISCREG_MC7_CTL
;
458 regNum
= MISCREG_MC0_STATUS
;
461 regNum
= MISCREG_MC1_STATUS
;
464 regNum
= MISCREG_MC2_STATUS
;
467 regNum
= MISCREG_MC3_STATUS
;
470 regNum
= MISCREG_MC4_STATUS
;
473 regNum
= MISCREG_MC5_STATUS
;
476 regNum
= MISCREG_MC6_STATUS
;
479 regNum
= MISCREG_MC7_STATUS
;
482 regNum
= MISCREG_MC0_ADDR
;
485 regNum
= MISCREG_MC1_ADDR
;
488 regNum
= MISCREG_MC2_ADDR
;
491 regNum
= MISCREG_MC3_ADDR
;
494 regNum
= MISCREG_MC4_ADDR
;
497 regNum
= MISCREG_MC5_ADDR
;
500 regNum
= MISCREG_MC6_ADDR
;
503 regNum
= MISCREG_MC7_ADDR
;
506 regNum
= MISCREG_MC0_MISC
;
509 regNum
= MISCREG_MC1_MISC
;
512 regNum
= MISCREG_MC2_MISC
;
515 regNum
= MISCREG_MC3_MISC
;
518 regNum
= MISCREG_MC4_MISC
;
521 regNum
= MISCREG_MC5_MISC
;
524 regNum
= MISCREG_MC6_MISC
;
527 regNum
= MISCREG_MC7_MISC
;
530 regNum
= MISCREG_EFER
;
533 regNum
= MISCREG_STAR
;
536 regNum
= MISCREG_LSTAR
;
539 regNum
= MISCREG_CSTAR
;
542 regNum
= MISCREG_SF_MASK
;
545 regNum
= MISCREG_FS_BASE
;
548 regNum
= MISCREG_GS_BASE
;
551 regNum
= MISCREG_KERNEL_GS_BASE
;
554 regNum
= MISCREG_TSC_AUX
;
557 regNum
= MISCREG_PERF_EVT_SEL0
;
560 regNum
= MISCREG_PERF_EVT_SEL1
;
563 regNum
= MISCREG_PERF_EVT_SEL2
;
566 regNum
= MISCREG_PERF_EVT_SEL3
;
569 regNum
= MISCREG_PERF_EVT_CTR0
;
572 regNum
= MISCREG_PERF_EVT_CTR1
;
575 regNum
= MISCREG_PERF_EVT_CTR2
;
578 regNum
= MISCREG_PERF_EVT_CTR3
;
581 regNum
= MISCREG_SYSCFG
;
584 regNum
= MISCREG_IORR_BASE0
;
587 regNum
= MISCREG_IORR_BASE1
;
590 regNum
= MISCREG_IORR_MASK0
;
593 regNum
= MISCREG_IORR_MASK1
;
596 regNum
= MISCREG_TOP_MEM
;
599 regNum
= MISCREG_TOP_MEM2
;
602 regNum
= MISCREG_VM_CR
;
605 regNum
= MISCREG_IGNNE
;
608 regNum
= MISCREG_SMM_CTL
;
611 regNum
= MISCREG_VM_HSAVE_PA
;
614 return std::make_shared
<GeneralProtection
>(0);
616 //The index is multiplied by the size of a MiscReg so that
617 //any memory dependence calculations will not see these as
619 req
->setPaddr(regNum
* sizeof(MiscReg
));
621 } else if (prefix
== IntAddrPrefixIO
) {
622 // TODO If CPL > IOPL or in virtual mode, check the I/O permission
623 // bitmap in the TSS.
625 Addr IOPort
= vaddr
& ~IntAddrPrefixMask
;
626 // Make sure the address fits in the expected 16 bit IO address
628 assert(!(IOPort
& ~0xFFFF));
630 if (IOPort
== 0xCF8 && req
->getSize() == 4) {
631 req
->setFlags(Request::MMAPPED_IPR
);
632 req
->setPaddr(MISCREG_PCI_CONFIG_ADDRESS
* sizeof(MiscReg
));
633 } else if ((IOPort
& ~mask(2)) == 0xCFC) {
634 req
->setFlags(Request::UNCACHEABLE
);
637 tc
->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS
);
639 if (bits(configAddress
, 31, 31)) {
640 req
->setPaddr(PhysAddrPrefixPciConfig
|
641 mbits(configAddress
, 30, 2) |
644 req
->setPaddr(PhysAddrPrefixIO
| IOPort
);
647 req
->setFlags(Request::UNCACHEABLE
);
648 req
->setPaddr(PhysAddrPrefixIO
| IOPort
);
652 panic("Access to unrecognized internal address space %#x.\n",
658 * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
659 * and false on a TLB miss.
660 * Many of the checks about different modes have been converted to
661 * assertions, since these parts of the code are not really used.
662 * On a hit it will update the LRU stack.
665 GpuTLB::tlbLookup(RequestPtr req
, ThreadContext
*tc
, bool update_stats
)
667 bool tlb_hit
= false;
669 uint32_t flags
= req
->getFlags();
670 int seg
= flags
& SegmentFlagMask
;
673 assert(seg
!= SEGMENT_REG_MS
);
674 Addr vaddr
= req
->getVaddr();
675 DPRINTF(GPUTLB
, "TLB Lookup for vaddr %#x.\n", vaddr
);
676 HandyM5Reg m5Reg
= tc
->readMiscRegNoEffect(MISCREG_M5_REG
);
679 DPRINTF(GPUTLB
, "In protected mode.\n");
680 // make sure we are in 64-bit mode
681 assert(m5Reg
.mode
== LongMode
);
683 // If paging is enabled, do the translation.
685 DPRINTF(GPUTLB
, "Paging enabled.\n");
686 //update LRU stack on a hit
687 GpuTlbEntry
*entry
= lookup(vaddr
, true);
693 // functional tlb access for memory initialization
694 // i.e., memory seeding or instr. seeding -> don't update
699 localNumTLBAccesses
++;
713 GpuTLB::translate(RequestPtr req
, ThreadContext
*tc
,
714 Translation
*translation
, Mode mode
,
715 bool &delayedResponse
, bool timing
, int &latency
)
717 uint32_t flags
= req
->getFlags();
718 int seg
= flags
& SegmentFlagMask
;
719 bool storeCheck
= flags
& (StoreCheck
<< FlagShift
);
721 // If this is true, we're dealing with a request
722 // to a non-memory address space.
723 if (seg
== SEGMENT_REG_MS
) {
724 return translateInt(req
, tc
);
727 delayedResponse
= false;
728 Addr vaddr
= req
->getVaddr();
729 DPRINTF(GPUTLB
, "Translating vaddr %#x.\n", vaddr
);
731 HandyM5Reg m5Reg
= tc
->readMiscRegNoEffect(MISCREG_M5_REG
);
733 // If protected mode has been enabled...
735 DPRINTF(GPUTLB
, "In protected mode.\n");
736 // If we're not in 64-bit mode, do protection/limit checks
737 if (m5Reg
.mode
!= LongMode
) {
738 DPRINTF(GPUTLB
, "Not in long mode. Checking segment "
741 // Check for a null segment selector.
742 if (!(seg
== SEGMENT_REG_TSG
|| seg
== SYS_SEGMENT_REG_IDTR
||
743 seg
== SEGMENT_REG_HS
|| seg
== SEGMENT_REG_LS
)
744 && !tc
->readMiscRegNoEffect(MISCREG_SEG_SEL(seg
))) {
745 return std::make_shared
<GeneralProtection
>(0);
748 bool expandDown
= false;
749 SegAttr attr
= tc
->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg
));
751 if (seg
>= SEGMENT_REG_ES
&& seg
<= SEGMENT_REG_HS
) {
752 if (!attr
.writable
&& (mode
== BaseTLB::Write
||
754 return std::make_shared
<GeneralProtection
>(0);
756 if (!attr
.readable
&& mode
== BaseTLB::Read
)
757 return std::make_shared
<GeneralProtection
>(0);
759 expandDown
= attr
.expandDown
;
763 Addr base
= tc
->readMiscRegNoEffect(MISCREG_SEG_BASE(seg
));
764 Addr limit
= tc
->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg
));
765 // This assumes we're not in 64 bit mode. If we were, the
766 // default address size is 64 bits, overridable to 32.
768 bool sizeOverride
= (flags
& (AddrSizeFlagBit
<< FlagShift
));
769 SegAttr csAttr
= tc
->readMiscRegNoEffect(MISCREG_CS_ATTR
);
771 if ((csAttr
.defaultSize
&& sizeOverride
) ||
772 (!csAttr
.defaultSize
&& !sizeOverride
)) {
776 Addr offset
= bits(vaddr
- base
, size
- 1, 0);
777 Addr endOffset
= offset
+ req
->getSize() - 1;
780 DPRINTF(GPUTLB
, "Checking an expand down segment.\n");
781 warn_once("Expand down segments are untested.\n");
783 if (offset
<= limit
|| endOffset
<= limit
)
784 return std::make_shared
<GeneralProtection
>(0);
786 if (offset
> limit
|| endOffset
> limit
)
787 return std::make_shared
<GeneralProtection
>(0);
791 // If paging is enabled, do the translation.
793 DPRINTF(GPUTLB
, "Paging enabled.\n");
794 // The vaddr already has the segment base applied.
795 GpuTlbEntry
*entry
= lookup(vaddr
);
796 localNumTLBAccesses
++;
801 latency
= missLatency1
;
805 fatal("GpuTLB doesn't support full-system mode\n");
807 DPRINTF(GPUTLB
, "Handling a TLB miss for address %#x "
808 "at pc %#x.\n", vaddr
, tc
->instAddr());
810 Process
*p
= tc
->getProcessPtr();
811 const EmulationPageTable::Entry
*pte
=
812 p
->pTable
->lookup(vaddr
);
814 if (!pte
&& mode
!= BaseTLB::Execute
) {
815 // penalize a "page fault" more
817 latency
+= missLatency2
;
819 if (p
->fixupStackFault(vaddr
))
820 pte
= p
->pTable
->lookup(vaddr
);
824 return std::make_shared
<PageFault
>(vaddr
, true,
828 Addr alignedVaddr
= p
->pTable
->pageAlign(vaddr
);
830 DPRINTF(GPUTLB
, "Mapping %#x to %#x\n",
831 alignedVaddr
, pte
->paddr
);
833 GpuTlbEntry
gpuEntry(
834 p
->pTable
->pid(), alignedVaddr
,
836 entry
= insert(alignedVaddr
, gpuEntry
);
839 DPRINTF(GPUTLB
, "Miss was serviced.\n");
845 latency
= hitLatency
;
849 // Do paging protection checks.
850 bool inUser
= (m5Reg
.cpl
== 3 &&
851 !(flags
& (CPL0FlagBit
<< FlagShift
)));
853 CR0 cr0
= tc
->readMiscRegNoEffect(MISCREG_CR0
);
854 bool badWrite
= (!entry
->writable
&& (inUser
|| cr0
.wp
));
856 if ((inUser
&& !entry
->user
) || (mode
== BaseTLB::Write
&&
858 // The page must have been present to get into the TLB in
859 // the first place. We'll assume the reserved bits are
860 // fine even though we're not checking them.
861 return std::make_shared
<PageFault
>(vaddr
, true, mode
,
865 if (storeCheck
&& badWrite
) {
866 // This would fault if this were a write, so return a page
867 // fault that reflects that happening.
868 return std::make_shared
<PageFault
>(vaddr
, true,
874 DPRINTF(GPUTLB
, "Entry found with paddr %#x, doing protection "
875 "checks.\n", entry
->paddr
);
877 int page_size
= entry
->size();
878 Addr paddr
= entry
->paddr
| (vaddr
& (page_size
- 1));
879 DPRINTF(GPUTLB
, "Translated %#x -> %#x.\n", vaddr
, paddr
);
880 req
->setPaddr(paddr
);
882 if (entry
->uncacheable
)
883 req
->setFlags(Request::UNCACHEABLE
);
885 //Use the address which already has segmentation applied.
886 DPRINTF(GPUTLB
, "Paging disabled.\n");
887 DPRINTF(GPUTLB
, "Translated %#x -> %#x.\n", vaddr
, vaddr
);
888 req
->setPaddr(vaddr
);
892 DPRINTF(GPUTLB
, "In real mode.\n");
893 DPRINTF(GPUTLB
, "Translated %#x -> %#x.\n", vaddr
, vaddr
);
894 req
->setPaddr(vaddr
);
897 // Check for an access to the local APIC
899 LocalApicBase localApicBase
=
900 tc
->readMiscRegNoEffect(MISCREG_APIC_BASE
);
902 Addr baseAddr
= localApicBase
.base
* PageBytes
;
903 Addr paddr
= req
->getPaddr();
905 if (baseAddr
<= paddr
&& baseAddr
+ PageBytes
> paddr
) {
906 // Force the access to be uncacheable.
907 req
->setFlags(Request::UNCACHEABLE
);
908 req
->setPaddr(x86LocalAPICAddress(tc
->contextId(),
917 GpuTLB::translateAtomic(RequestPtr req
, ThreadContext
*tc
, Mode mode
,
920 bool delayedResponse
;
922 return GpuTLB::translate(req
, tc
, nullptr, mode
, delayedResponse
, false,
927 GpuTLB::translateTiming(RequestPtr req
, ThreadContext
*tc
,
928 Translation
*translation
, Mode mode
, int &latency
)
930 bool delayedResponse
;
933 Fault fault
= GpuTLB::translate(req
, tc
, translation
, mode
,
934 delayedResponse
, true, latency
);
936 if (!delayedResponse
)
937 translation
->finish(fault
, req
, tc
, mode
);
948 GpuTLB::serialize(CheckpointOut
&cp
) const
953 GpuTLB::unserialize(CheckpointIn
&cp
)
960 MemObject::regStats();
963 .name(name() + ".local_TLB_accesses")
964 .desc("Number of TLB accesses")
968 .name(name() + ".local_TLB_hits")
969 .desc("Number of TLB hits")
973 .name(name() + ".local_TLB_misses")
974 .desc("Number of TLB misses")
978 .name(name() + ".local_TLB_miss_rate")
979 .desc("TLB miss rate")
983 .name(name() + ".access_cycles")
984 .desc("Cycles spent accessing this TLB level")
988 .name(name() + ".page_table_cycles")
989 .desc("Cycles spent accessing the page table")
992 localTLBMissRate
= 100 * localNumTLBMisses
/ localNumTLBAccesses
;
995 .name(name() + ".unique_pages")
996 .desc("Number of unique pages touched")
1000 .name(name() + ".local_cycles")
1001 .desc("Number of cycles spent in queue for all incoming reqs")
1005 .name(name() + ".local_latency")
1006 .desc("Avg. latency over incoming coalesced reqs")
1009 localLatency
= localCycles
/ localNumTLBAccesses
;
1011 globalNumTLBAccesses
1012 .name(name() + ".global_TLB_accesses")
1013 .desc("Number of TLB accesses")
1017 .name(name() + ".global_TLB_hits")
1018 .desc("Number of TLB hits")
1022 .name(name() + ".global_TLB_misses")
1023 .desc("Number of TLB misses")
1027 .name(name() + ".global_TLB_miss_rate")
1028 .desc("TLB miss rate")
1031 globalTLBMissRate
= 100 * globalNumTLBMisses
/ globalNumTLBAccesses
;
1034 .name(name() + ".avg_reuse_distance")
1035 .desc("avg. reuse distance over all pages (in ticks)")
1041 * Do the TLB lookup for this coalesced request and schedule
1042 * another event <TLB access latency> cycles later.
1046 GpuTLB::issueTLBLookup(PacketPtr pkt
)
1049 assert(pkt
->senderState
);
1051 Addr virt_page_addr
= roundDown(pkt
->req
->getVaddr(),
1054 TranslationState
*sender_state
=
1055 safe_cast
<TranslationState
*>(pkt
->senderState
);
1057 bool update_stats
= !sender_state
->prefetch
;
1058 ThreadContext
* tmp_tc
= sender_state
->tc
;
1060 DPRINTF(GPUTLB
, "Translation req. for virt. page addr %#x\n",
1063 int req_cnt
= sender_state
->reqCnt
.back();
1066 accessCycles
-= (curTick() * req_cnt
);
1067 localCycles
-= curTick();
1068 updatePageFootprint(virt_page_addr
);
1069 globalNumTLBAccesses
+= req_cnt
;
1072 tlbOutcome lookup_outcome
= TLB_MISS
;
1073 RequestPtr tmp_req
= pkt
->req
;
1075 // Access the TLB and figure out if it's a hit or a miss.
1076 bool success
= tlbLookup(tmp_req
, tmp_tc
, update_stats
);
1079 lookup_outcome
= TLB_HIT
;
1080 // Put the entry in SenderState
1081 GpuTlbEntry
*entry
= lookup(tmp_req
->getVaddr(), false);
1084 sender_state
->tlbEntry
=
1085 new GpuTlbEntry(0, entry
->vaddr
, entry
->paddr
, entry
->valid
);
1088 // the reqCnt has an entry per level, so its size tells us
1089 // which level we are in
1090 sender_state
->hitLevel
= sender_state
->reqCnt
.size();
1091 globalNumTLBHits
+= req_cnt
;
1095 globalNumTLBMisses
+= req_cnt
;
1099 * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1100 * as the TLB access latency.
1102 * We create and schedule a new TLBEvent which will help us take the
1103 * appropriate actions (e.g., update TLB on a hit, send request to lower
1104 * level TLB on a miss, or start a page walk if this was the last-level
1107 TLBEvent
*tlb_event
=
1108 new TLBEvent(this, virt_page_addr
, lookup_outcome
, pkt
);
1110 if (translationReturnEvent
.count(virt_page_addr
)) {
1111 panic("Virtual Page Address %#x already has a return event\n",
1115 translationReturnEvent
[virt_page_addr
] = tlb_event
;
1118 DPRINTF(GPUTLB
, "schedule translationReturnEvent @ curTick %d\n",
1119 curTick() + this->ticks(hitLatency
));
1121 schedule(tlb_event
, curTick() + this->ticks(hitLatency
));
1124 GpuTLB::TLBEvent::TLBEvent(GpuTLB
* _tlb
, Addr _addr
, tlbOutcome tlb_outcome
,
1126 : Event(CPU_Tick_Pri
), tlb(_tlb
), virtPageAddr(_addr
),
1127 outcome(tlb_outcome
), pkt(_pkt
)
1132 * Do Paging protection checks. If we encounter a page fault, then
1133 * an assertion is fired.
1136 GpuTLB::pagingProtectionChecks(ThreadContext
*tc
, PacketPtr pkt
,
1137 GpuTlbEntry
* tlb_entry
, Mode mode
)
1139 HandyM5Reg m5Reg
= tc
->readMiscRegNoEffect(MISCREG_M5_REG
);
1140 uint32_t flags
= pkt
->req
->getFlags();
1141 bool storeCheck
= flags
& (StoreCheck
<< FlagShift
);
1143 // Do paging protection checks.
1144 bool inUser
= (m5Reg
.cpl
== 3 && !(flags
& (CPL0FlagBit
<< FlagShift
)));
1145 CR0 cr0
= tc
->readMiscRegNoEffect(MISCREG_CR0
);
1147 bool badWrite
= (!tlb_entry
->writable
&& (inUser
|| cr0
.wp
));
1149 if ((inUser
&& !tlb_entry
->user
) ||
1150 (mode
== BaseTLB::Write
&& badWrite
)) {
1151 // The page must have been present to get into the TLB in
1152 // the first place. We'll assume the reserved bits are
1153 // fine even though we're not checking them.
1157 if (storeCheck
&& badWrite
) {
1158 // This would fault if this were a write, so return a page
1159 // fault that reflects that happening.
1165 * handleTranslationReturn is called on a TLB hit,
1166 * when a TLB miss returns or when a page fault returns.
1167 * The latter calls handelHit with TLB miss as tlbOutcome.
1170 GpuTLB::handleTranslationReturn(Addr virt_page_addr
, tlbOutcome tlb_outcome
,
1175 Addr vaddr
= pkt
->req
->getVaddr();
1177 TranslationState
*sender_state
=
1178 safe_cast
<TranslationState
*>(pkt
->senderState
);
1180 ThreadContext
*tc
= sender_state
->tc
;
1181 Mode mode
= sender_state
->tlbMode
;
1183 GpuTlbEntry
*local_entry
, *new_entry
;
1185 if (tlb_outcome
== TLB_HIT
) {
1186 DPRINTF(GPUTLB
, "Translation Done - TLB Hit for addr %#x\n", vaddr
);
1187 local_entry
= sender_state
->tlbEntry
;
1189 DPRINTF(GPUTLB
, "Translation Done - TLB Miss for addr %#x\n",
1192 // We are returning either from a page walk or from a hit at a lower
1193 // TLB level. The senderState should be "carrying" a pointer to the
1194 // correct TLBEntry.
1195 new_entry
= sender_state
->tlbEntry
;
1197 local_entry
= new_entry
;
1199 if (allocationPolicy
) {
1200 DPRINTF(GPUTLB
, "allocating entry w/ addr %#x\n",
1203 local_entry
= insert(virt_page_addr
, *new_entry
);
1206 assert(local_entry
);
1210 * At this point the packet carries an up-to-date tlbEntry pointer
1211 * in its senderState.
1212 * Next step is to do the paging protection checks.
1214 DPRINTF(GPUTLB
, "Entry found with vaddr %#x, doing protection checks "
1215 "while paddr was %#x.\n", local_entry
->vaddr
,
1216 local_entry
->paddr
);
1218 pagingProtectionChecks(tc
, pkt
, local_entry
, mode
);
1219 int page_size
= local_entry
->size();
1220 Addr paddr
= local_entry
->paddr
| (vaddr
& (page_size
- 1));
1221 DPRINTF(GPUTLB
, "Translated %#x -> %#x.\n", vaddr
, paddr
);
1223 // Since this packet will be sent through the cpu side slave port,
1224 // it must be converted to a response pkt if it is not one already
1225 if (pkt
->isRequest()) {
1226 pkt
->makeTimingResponse();
1229 pkt
->req
->setPaddr(paddr
);
1231 if (local_entry
->uncacheable
) {
1232 pkt
->req
->setFlags(Request::UNCACHEABLE
);
1235 //send packet back to coalescer
1236 cpuSidePort
[0]->sendTimingResp(pkt
);
1237 //schedule cleanup event
1238 cleanupQueue
.push(virt_page_addr
);
1240 // schedule this only once per cycle.
1241 // The check is required because we might have multiple translations
1242 // returning the same cycle
1243 // this is a maximum priority event and must be on the same cycle
1244 // as the cleanup event in TLBCoalescer to avoid a race with
1245 // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1246 if (!cleanupEvent
.scheduled())
1247 schedule(cleanupEvent
, curTick());
1251 * Here we take the appropriate actions based on the result of the
1255 GpuTLB::translationReturn(Addr virtPageAddr
, tlbOutcome outcome
,
1258 DPRINTF(GPUTLB
, "Triggered TLBEvent for addr %#x\n", virtPageAddr
);
1260 assert(translationReturnEvent
[virtPageAddr
]);
1263 TranslationState
*tmp_sender_state
=
1264 safe_cast
<TranslationState
*>(pkt
->senderState
);
1266 int req_cnt
= tmp_sender_state
->reqCnt
.back();
1267 bool update_stats
= !tmp_sender_state
->prefetch
;
1270 if (outcome
== TLB_HIT
) {
1271 handleTranslationReturn(virtPageAddr
, TLB_HIT
, pkt
);
1274 accessCycles
+= (req_cnt
* curTick());
1275 localCycles
+= curTick();
1278 } else if (outcome
== TLB_MISS
) {
1280 DPRINTF(GPUTLB
, "This is a TLB miss\n");
1282 accessCycles
+= (req_cnt
*curTick());
1283 localCycles
+= curTick();
1286 if (hasMemSidePort
) {
1287 // the one cyle added here represent the delay from when we get
1288 // the reply back till when we propagate it to the coalescer
1291 accessCycles
+= (req_cnt
* 1);
1296 * There is a TLB below. Send the coalesced request.
1297 * We actually send the very first packet of all the
1298 * pending packets for this virtual page address.
1300 if (!memSidePort
[0]->sendTimingReq(pkt
)) {
1301 DPRINTF(GPUTLB
, "Failed sending translation request to "
1302 "lower level TLB for addr %#x\n", virtPageAddr
);
1304 memSidePort
[0]->retries
.push_back(pkt
);
1306 DPRINTF(GPUTLB
, "Sent translation request to lower level "
1307 "TLB for addr %#x\n", virtPageAddr
);
1310 //this is the last level TLB. Start a page walk
1311 DPRINTF(GPUTLB
, "Last level TLB - start a page walk for "
1312 "addr %#x\n", virtPageAddr
);
1315 pageTableCycles
-= (req_cnt
*curTick());
1317 TLBEvent
*tlb_event
= translationReturnEvent
[virtPageAddr
];
1319 tlb_event
->updateOutcome(PAGE_WALK
);
1320 schedule(tlb_event
, curTick() + ticks(missLatency2
));
1322 } else if (outcome
== PAGE_WALK
) {
1324 pageTableCycles
+= (req_cnt
*curTick());
1326 // Need to access the page table and update the TLB
1327 DPRINTF(GPUTLB
, "Doing a page walk for address %#x\n",
1330 TranslationState
*sender_state
=
1331 safe_cast
<TranslationState
*>(pkt
->senderState
);
1333 Process
*p
= sender_state
->tc
->getProcessPtr();
1334 Addr vaddr
= pkt
->req
->getVaddr();
1336 Addr alignedVaddr
= p
->pTable
->pageAlign(vaddr
);
1337 assert(alignedVaddr
== virtPageAddr
);
1339 const EmulationPageTable::Entry
*pte
= p
->pTable
->lookup(vaddr
);
1340 if (!pte
&& sender_state
->tlbMode
!= BaseTLB::Execute
&&
1341 p
->fixupStackFault(vaddr
)) {
1342 pte
= p
->pTable
->lookup(vaddr
);
1346 DPRINTF(GPUTLB
, "Mapping %#x to %#x\n", alignedVaddr
,
1349 sender_state
->tlbEntry
=
1350 new GpuTlbEntry(0, virtPageAddr
, pte
->paddr
, true);
1352 sender_state
->tlbEntry
=
1353 new GpuTlbEntry(0, 0, 0, false);
1356 handleTranslationReturn(virtPageAddr
, TLB_MISS
, pkt
);
1357 } else if (outcome
== MISS_RETURN
) {
1358 /** we add an extra cycle in the return path of the translation
1359 * requests in between the various TLB levels.
1361 handleTranslationReturn(virtPageAddr
, TLB_MISS
, pkt
);
1368 GpuTLB::TLBEvent::process()
1370 tlb
->translationReturn(virtPageAddr
, outcome
, pkt
);
1374 GpuTLB::TLBEvent::description() const
1376 return "trigger translationDoneEvent";
1380 GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome
)
1386 GpuTLB::TLBEvent::getTLBEventVaddr()
1388 return virtPageAddr
;
1392 * recvTiming receives a coalesced timing request from a TLBCoalescer
1393 * and it calls issueTLBLookup()
1394 * It only rejects the packet if we have exceeded the max
1395 * outstanding number of requests for the TLB
1398 GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt
)
1400 if (tlb
->outstandingReqs
< tlb
->maxCoalescedReqs
) {
1401 tlb
->issueTLBLookup(pkt
);
1402 // update number of outstanding translation requests
1403 tlb
->outstandingReqs
++;
1406 DPRINTF(GPUTLB
, "Reached maxCoalescedReqs number %d\n",
1407 tlb
->outstandingReqs
);
1413 * handleFuncTranslationReturn is called on a TLB hit,
1414 * when a TLB miss returns or when a page fault returns.
1415 * It updates LRU, inserts the TLB entry on a miss
1416 * depending on the allocation policy and does the required
1417 * protection checks. It does NOT create a new packet to
1418 * update the packet's addr; this is done in hsail-gpu code.
1421 GpuTLB::handleFuncTranslationReturn(PacketPtr pkt
, tlbOutcome tlb_outcome
)
1423 TranslationState
*sender_state
=
1424 safe_cast
<TranslationState
*>(pkt
->senderState
);
1426 ThreadContext
*tc
= sender_state
->tc
;
1427 Mode mode
= sender_state
->tlbMode
;
1428 Addr vaddr
= pkt
->req
->getVaddr();
1430 GpuTlbEntry
*local_entry
, *new_entry
;
1432 if (tlb_outcome
== TLB_HIT
) {
1433 DPRINTF(GPUTLB
, "Functional Translation Done - TLB hit for addr "
1436 local_entry
= sender_state
->tlbEntry
;
1438 DPRINTF(GPUTLB
, "Functional Translation Done - TLB miss for addr "
1441 // We are returning either from a page walk or from a hit at a lower
1442 // TLB level. The senderState should be "carrying" a pointer to the
1443 // correct TLBEntry.
1444 new_entry
= sender_state
->tlbEntry
;
1446 local_entry
= new_entry
;
1448 if (allocationPolicy
) {
1449 Addr virt_page_addr
= roundDown(vaddr
, TheISA::PageBytes
);
1451 DPRINTF(GPUTLB
, "allocating entry w/ addr %#x\n",
1454 local_entry
= insert(virt_page_addr
, *new_entry
);
1457 assert(local_entry
);
1460 DPRINTF(GPUTLB
, "Entry found with vaddr %#x, doing protection checks "
1461 "while paddr was %#x.\n", local_entry
->vaddr
,
1462 local_entry
->paddr
);
1464 // Do paging checks if it's a normal functional access. If it's for a
1465 // prefetch, then sometimes you can try to prefetch something that won't
1466 // pass protection. We don't actually want to fault becuase there is no
1467 // demand access to deem this a violation. Just put it in the TLB and
1468 // it will fault if indeed a future demand access touches it in
1470 if (!sender_state
->prefetch
&& sender_state
->tlbEntry
->valid
)
1471 pagingProtectionChecks(tc
, pkt
, local_entry
, mode
);
1473 int page_size
= local_entry
->size();
1474 Addr paddr
= local_entry
->paddr
| (vaddr
& (page_size
- 1));
1475 DPRINTF(GPUTLB
, "Translated %#x -> %#x.\n", vaddr
, paddr
);
1477 pkt
->req
->setPaddr(paddr
);
1479 if (local_entry
->uncacheable
)
1480 pkt
->req
->setFlags(Request::UNCACHEABLE
);
1483 // This is used for atomic translations. Need to
1484 // make it all happen during the same cycle.
1486 GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt
)
1488 TranslationState
*sender_state
=
1489 safe_cast
<TranslationState
*>(pkt
->senderState
);
1491 ThreadContext
*tc
= sender_state
->tc
;
1492 bool update_stats
= !sender_state
->prefetch
;
1494 Addr virt_page_addr
= roundDown(pkt
->req
->getVaddr(),
1498 tlb
->updatePageFootprint(virt_page_addr
);
1500 // do the TLB lookup without updating the stats
1501 bool success
= tlb
->tlbLookup(pkt
->req
, tc
, update_stats
);
1502 tlbOutcome tlb_outcome
= success
? TLB_HIT
: TLB_MISS
;
1504 // functional mode means no coalescing
1505 // global metrics are the same as the local metrics
1507 tlb
->globalNumTLBAccesses
++;
1510 sender_state
->hitLevel
= sender_state
->reqCnt
.size();
1511 tlb
->globalNumTLBHits
++;
1517 tlb
->globalNumTLBMisses
++;
1518 if (tlb
->hasMemSidePort
) {
1519 // there is a TLB below -> propagate down the TLB hierarchy
1520 tlb
->memSidePort
[0]->sendFunctional(pkt
);
1521 // If no valid translation from a prefetch, then just return
1522 if (sender_state
->prefetch
&& !pkt
->req
->hasPaddr())
1525 // Need to access the page table and update the TLB
1526 DPRINTF(GPUTLB
, "Doing a page walk for address %#x\n",
1529 Process
*p
= tc
->getProcessPtr();
1531 Addr vaddr
= pkt
->req
->getVaddr();
1533 Addr alignedVaddr
= p
->pTable
->pageAlign(vaddr
);
1534 assert(alignedVaddr
== virt_page_addr
);
1537 const EmulationPageTable::Entry
*pte
=
1538 p
->pTable
->lookup(vaddr
);
1539 if (!pte
&& sender_state
->tlbMode
!= BaseTLB::Execute
&&
1540 p
->fixupStackFault(vaddr
)) {
1541 pte
= p
->pTable
->lookup(vaddr
);
1544 if (!sender_state
->prefetch
) {
1545 // no PageFaults are permitted after
1546 // the second page table lookup
1549 DPRINTF(GPUTLB
, "Mapping %#x to %#x\n", alignedVaddr
,
1552 sender_state
->tlbEntry
=
1553 new GpuTlbEntry(0, virt_page_addr
,
1556 // If this was a prefetch, then do the normal thing if it
1557 // was a successful translation. Otherwise, send an empty
1558 // TLB entry back so that it can be figured out as empty and
1559 // handled accordingly.
1561 DPRINTF(GPUTLB
, "Mapping %#x to %#x\n", alignedVaddr
,
1564 sender_state
->tlbEntry
=
1565 new GpuTlbEntry(0, virt_page_addr
,
1568 DPRINTF(GPUPrefetch
, "Prefetch failed %#x\n",
1571 sender_state
->tlbEntry
= new GpuTlbEntry();
1578 DPRINTF(GPUPrefetch
, "Functional Hit for vaddr %#x\n",
1579 tlb
->lookup(pkt
->req
->getVaddr()));
1581 GpuTlbEntry
*entry
= tlb
->lookup(pkt
->req
->getVaddr(),
1586 sender_state
->tlbEntry
=
1587 new GpuTlbEntry(0, entry
->vaddr
, entry
->paddr
, entry
->valid
);
1589 // This is the function that would populate pkt->req with the paddr of
1590 // the translation. But if no translation happens (i.e Prefetch fails)
1591 // then the early returns in the above code wiill keep this function
1593 tlb
->handleFuncTranslationReturn(pkt
, tlb_outcome
);
1597 GpuTLB::CpuSidePort::recvReqRetry()
1599 // The CPUSidePort never sends anything but replies. No retries
1605 GpuTLB::CpuSidePort::getAddrRanges() const
1607 // currently not checked by the master
1608 AddrRangeList ranges
;
1614 * MemSidePort receives the packet back.
1615 * We need to call the handleTranslationReturn
1616 * and propagate up the hierarchy.
1619 GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt
)
1621 Addr virt_page_addr
= roundDown(pkt
->req
->getVaddr(),
1624 DPRINTF(GPUTLB
, "MemSidePort recvTiming for virt_page_addr %#x\n",
1627 TLBEvent
*tlb_event
= tlb
->translationReturnEvent
[virt_page_addr
];
1629 assert(virt_page_addr
== tlb_event
->getTLBEventVaddr());
1631 tlb_event
->updateOutcome(MISS_RETURN
);
1632 tlb
->schedule(tlb_event
, curTick()+tlb
->ticks(1));
1638 GpuTLB::MemSidePort::recvReqRetry()
1640 // No retries should reach the TLB. The retries
1641 // should only reach the TLBCoalescer.
1648 while (!cleanupQueue
.empty()) {
1649 Addr cleanup_addr
= cleanupQueue
.front();
1653 TLBEvent
* old_tlb_event
= translationReturnEvent
[cleanup_addr
];
1654 delete old_tlb_event
;
1655 translationReturnEvent
.erase(cleanup_addr
);
1657 // update number of outstanding requests
1661 /** the higher level coalescer should retry if it has
1662 * any pending requests.
1664 for (int i
= 0; i
< cpuSidePort
.size(); ++i
) {
1665 cpuSidePort
[i
]->sendRetryReq();
1670 GpuTLB::updatePageFootprint(Addr virt_page_addr
)
1673 std::pair
<AccessPatternTable::iterator
, bool> ret
;
1675 AccessInfo tmp_access_info
;
1676 tmp_access_info
.lastTimeAccessed
= 0;
1677 tmp_access_info
.accessesPerPage
= 0;
1678 tmp_access_info
.totalReuseDistance
= 0;
1679 tmp_access_info
.sumDistance
= 0;
1680 tmp_access_info
.meanDistance
= 0;
1682 ret
= TLBFootprint
.insert(AccessPatternTable::value_type(virt_page_addr
,
1685 bool first_page_access
= ret
.second
;
1687 if (first_page_access
) {
1690 int accessed_before
;
1691 accessed_before
= curTick() - ret
.first
->second
.lastTimeAccessed
;
1692 ret
.first
->second
.totalReuseDistance
+= accessed_before
;
1695 ret
.first
->second
.accessesPerPage
++;
1696 ret
.first
->second
.lastTimeAccessed
= curTick();
1698 if (accessDistance
) {
1699 ret
.first
->second
.localTLBAccesses
1700 .push_back(localNumTLBAccesses
.value());
1705 GpuTLB::exitCallback()
1707 std::ostream
*page_stat_file
= nullptr;
1709 if (accessDistance
) {
1711 // print per page statistics to a separate file (.csv format)
1712 // simout is the gem5 output directory (default is m5out or the one
1713 // specified with -d
1714 page_stat_file
= simout
.create(name().c_str())->stream();
1717 *page_stat_file
<< "page,max_access_distance,mean_access_distance, "
1718 << "stddev_distance" << std::endl
;
1721 // update avg. reuse distance footprint
1722 AccessPatternTable::iterator iter
, iter_begin
, iter_end
;
1723 unsigned int sum_avg_reuse_distance_per_page
= 0;
1725 // iterate through all pages seen by this TLB
1726 for (iter
= TLBFootprint
.begin(); iter
!= TLBFootprint
.end(); iter
++) {
1727 sum_avg_reuse_distance_per_page
+= iter
->second
.totalReuseDistance
/
1728 iter
->second
.accessesPerPage
;
1730 if (accessDistance
) {
1731 unsigned int tmp
= iter
->second
.localTLBAccesses
[0];
1732 unsigned int prev
= tmp
;
1734 for (int i
= 0; i
< iter
->second
.localTLBAccesses
.size(); ++i
) {
1739 prev
= iter
->second
.localTLBAccesses
[i
];
1740 // update the localTLBAccesses value
1741 // with the actual differece
1742 iter
->second
.localTLBAccesses
[i
] -= tmp
;
1743 // compute the sum of AccessDistance per page
1744 // used later for mean
1745 iter
->second
.sumDistance
+=
1746 iter
->second
.localTLBAccesses
[i
];
1749 iter
->second
.meanDistance
=
1750 iter
->second
.sumDistance
/ iter
->second
.accessesPerPage
;
1752 // compute std_dev and max (we need a second round because we
1753 // need to know the mean value
1754 unsigned int max_distance
= 0;
1755 unsigned int stddev_distance
= 0;
1757 for (int i
= 0; i
< iter
->second
.localTLBAccesses
.size(); ++i
) {
1758 unsigned int tmp_access_distance
=
1759 iter
->second
.localTLBAccesses
[i
];
1761 if (tmp_access_distance
> max_distance
) {
1762 max_distance
= tmp_access_distance
;
1766 tmp_access_distance
- iter
->second
.meanDistance
;
1767 stddev_distance
+= pow(diff
, 2);
1772 sqrt(stddev_distance
/iter
->second
.accessesPerPage
);
1774 if (page_stat_file
) {
1775 *page_stat_file
<< std::hex
<< iter
->first
<< ",";
1776 *page_stat_file
<< std::dec
<< max_distance
<< ",";
1777 *page_stat_file
<< std::dec
<< iter
->second
.meanDistance
1779 *page_stat_file
<< std::dec
<< stddev_distance
;
1780 *page_stat_file
<< std::endl
;
1783 // erase the localTLBAccesses array
1784 iter
->second
.localTLBAccesses
.clear();
1788 if (!TLBFootprint
.empty()) {
1790 sum_avg_reuse_distance_per_page
/ TLBFootprint
.size();
1793 //clear the TLBFootprint map
1794 TLBFootprint
.clear();
1796 } // namespace X86ISA
1799 X86GPUTLBParams::create()
1801 return new X86ISA::GpuTLB(this);