2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
5 * For use for simulation and test purposes only
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
36 #include "gpu-compute/gpu_tlb.hh"
41 #include "arch/x86/faults.hh"
42 #include "arch/x86/insts/microldstop.hh"
43 #include "arch/x86/pagetable.hh"
44 #include "arch/x86/pagetable_walker.hh"
45 #include "arch/x86/regs/misc.hh"
46 #include "arch/x86/x86_traits.hh"
47 #include "base/bitfield.hh"
48 #include "base/logging.hh"
49 #include "base/output.hh"
50 #include "base/trace.hh"
51 #include "cpu/base.hh"
52 #include "cpu/thread_context.hh"
53 #include "debug/GPUPrefetch.hh"
54 #include "debug/GPUTLB.hh"
55 #include "mem/packet_access.hh"
56 #include "mem/page_table.hh"
57 #include "mem/request.hh"
58 #include "sim/process.hh"
63 GpuTLB::GpuTLB(const Params
*p
)
64 : ClockedObject(p
), configAddress(0), size(p
->size
),
65 cleanupEvent([this]{ cleanup(); }, name(), false,
67 exitEvent([this]{ exitCallback(); }, name())
70 assert(assoc
<= size
);
72 allocationPolicy
= p
->allocationPolicy
;
73 hasMemSidePort
= false;
74 accessDistance
= p
->accessDistance
;
75 clock
= p
->clk_domain
->clockPeriod();
77 tlb
.assign(size
, TlbEntry());
79 freeList
.resize(numSets
);
80 entryList
.resize(numSets
);
82 for (int set
= 0; set
< numSets
; ++set
) {
83 for (int way
= 0; way
< assoc
; ++way
) {
84 int x
= set
* assoc
+ way
;
85 freeList
[set
].push_back(&tlb
.at(x
));
92 * @warning: the set-associative version assumes you have a
93 * fixed page size of 4KB.
94 * If the page size is greather than 4KB (as defined in the
95 * TheISA::PageBytes), then there are various issues w/ the current
96 * implementation (you'd have the same 8KB page being replicated in
99 setMask
= numSets
- 1;
101 maxCoalescedReqs
= p
->maxOutstandingReqs
;
103 // Do not allow maxCoalescedReqs to be more than the TLB associativity
104 if (maxCoalescedReqs
> assoc
) {
105 maxCoalescedReqs
= assoc
;
106 cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc
);
110 hitLatency
= p
->hitLatency
;
111 missLatency1
= p
->missLatency1
;
112 missLatency2
= p
->missLatency2
;
114 // create the slave ports based on the number of connected ports
115 for (size_t i
= 0; i
< p
->port_slave_connection_count
; ++i
) {
116 cpuSidePort
.push_back(new CpuSidePort(csprintf("%s-port%d",
117 name(), i
), this, i
));
120 // create the master ports based on the number of connected ports
121 for (size_t i
= 0; i
< p
->port_master_connection_count
; ++i
) {
122 memSidePort
.push_back(new MemSidePort(csprintf("%s-port%d",
123 name(), i
), this, i
));
127 // fixme: this is never called?
130 // make sure all the hash-maps are empty
131 assert(translationReturnEvent
.empty());
135 GpuTLB::getPort(const std::string
&if_name
, PortID idx
)
137 if (if_name
== "slave") {
138 if (idx
>= static_cast<PortID
>(cpuSidePort
.size())) {
139 panic("TLBCoalescer::getPort: unknown index %d\n", idx
);
142 return *cpuSidePort
[idx
];
143 } else if (if_name
== "master") {
144 if (idx
>= static_cast<PortID
>(memSidePort
.size())) {
145 panic("TLBCoalescer::getPort: unknown index %d\n", idx
);
148 hasMemSidePort
= true;
150 return *memSidePort
[idx
];
152 panic("TLBCoalescer::getPort: unknown port %s\n", if_name
);
157 GpuTLB::insert(Addr vpn
, TlbEntry
&entry
)
159 TlbEntry
*newEntry
= nullptr;
162 * vpn holds the virtual page address
163 * The least significant bits are simply masked
165 int set
= (vpn
>> TheISA::PageShift
) & setMask
;
167 if (!freeList
[set
].empty()) {
168 newEntry
= freeList
[set
].front();
169 freeList
[set
].pop_front();
171 newEntry
= entryList
[set
].back();
172 entryList
[set
].pop_back();
176 newEntry
->vaddr
= vpn
;
177 entryList
[set
].push_front(newEntry
);
182 GpuTLB::EntryList::iterator
183 GpuTLB::lookupIt(Addr va
, bool update_lru
)
185 int set
= (va
>> TheISA::PageShift
) & setMask
;
191 auto entry
= entryList
[set
].begin();
192 for (; entry
!= entryList
[set
].end(); ++entry
) {
193 int page_size
= (*entry
)->size();
195 if ((*entry
)->vaddr
<= va
&& (*entry
)->vaddr
+ page_size
> va
) {
196 DPRINTF(GPUTLB
, "Matched vaddr %#x to entry starting at %#x "
197 "with size %#x.\n", va
, (*entry
)->vaddr
, page_size
);
200 entryList
[set
].push_front(*entry
);
201 entryList
[set
].erase(entry
);
202 entry
= entryList
[set
].begin();
213 GpuTLB::lookup(Addr va
, bool update_lru
)
215 int set
= (va
>> TheISA::PageShift
) & setMask
;
217 auto entry
= lookupIt(va
, update_lru
);
219 if (entry
== entryList
[set
].end())
226 GpuTLB::invalidateAll()
228 DPRINTF(GPUTLB
, "Invalidating all entries.\n");
230 for (int i
= 0; i
< numSets
; ++i
) {
231 while (!entryList
[i
].empty()) {
232 TlbEntry
*entry
= entryList
[i
].front();
233 entryList
[i
].pop_front();
234 freeList
[i
].push_back(entry
);
240 GpuTLB::setConfigAddress(uint32_t addr
)
242 configAddress
= addr
;
246 GpuTLB::invalidateNonGlobal()
248 DPRINTF(GPUTLB
, "Invalidating all non global entries.\n");
250 for (int i
= 0; i
< numSets
; ++i
) {
251 for (auto entryIt
= entryList
[i
].begin();
252 entryIt
!= entryList
[i
].end();) {
253 if (!(*entryIt
)->global
) {
254 freeList
[i
].push_back(*entryIt
);
255 entryList
[i
].erase(entryIt
++);
264 GpuTLB::demapPage(Addr va
, uint64_t asn
)
267 int set
= (va
>> TheISA::PageShift
) & setMask
;
268 auto entry
= lookupIt(va
, false);
270 if (entry
!= entryList
[set
].end()) {
271 freeList
[set
].push_back(*entry
);
272 entryList
[set
].erase(entry
);
277 GpuTLB::translateInt(const RequestPtr
&req
, ThreadContext
*tc
)
279 DPRINTF(GPUTLB
, "Addresses references internal memory.\n");
280 Addr vaddr
= req
->getVaddr();
281 Addr prefix
= (vaddr
>> 3) & IntAddrPrefixMask
;
283 if (prefix
== IntAddrPrefixCPUID
) {
284 panic("CPUID memory space not yet implemented!\n");
285 } else if (prefix
== IntAddrPrefixMSR
) {
287 req
->setFlags(Request::MMAPPED_IPR
);
290 switch (vaddr
& ~IntAddrPrefixMask
) {
292 regNum
= MISCREG_TSC
;
295 regNum
= MISCREG_APIC_BASE
;
298 regNum
= MISCREG_MTRRCAP
;
301 regNum
= MISCREG_SYSENTER_CS
;
304 regNum
= MISCREG_SYSENTER_ESP
;
307 regNum
= MISCREG_SYSENTER_EIP
;
310 regNum
= MISCREG_MCG_CAP
;
313 regNum
= MISCREG_MCG_STATUS
;
316 regNum
= MISCREG_MCG_CTL
;
319 regNum
= MISCREG_DEBUG_CTL_MSR
;
322 regNum
= MISCREG_LAST_BRANCH_FROM_IP
;
325 regNum
= MISCREG_LAST_BRANCH_TO_IP
;
328 regNum
= MISCREG_LAST_EXCEPTION_FROM_IP
;
331 regNum
= MISCREG_LAST_EXCEPTION_TO_IP
;
334 regNum
= MISCREG_MTRR_PHYS_BASE_0
;
337 regNum
= MISCREG_MTRR_PHYS_MASK_0
;
340 regNum
= MISCREG_MTRR_PHYS_BASE_1
;
343 regNum
= MISCREG_MTRR_PHYS_MASK_1
;
346 regNum
= MISCREG_MTRR_PHYS_BASE_2
;
349 regNum
= MISCREG_MTRR_PHYS_MASK_2
;
352 regNum
= MISCREG_MTRR_PHYS_BASE_3
;
355 regNum
= MISCREG_MTRR_PHYS_MASK_3
;
358 regNum
= MISCREG_MTRR_PHYS_BASE_4
;
361 regNum
= MISCREG_MTRR_PHYS_MASK_4
;
364 regNum
= MISCREG_MTRR_PHYS_BASE_5
;
367 regNum
= MISCREG_MTRR_PHYS_MASK_5
;
370 regNum
= MISCREG_MTRR_PHYS_BASE_6
;
373 regNum
= MISCREG_MTRR_PHYS_MASK_6
;
376 regNum
= MISCREG_MTRR_PHYS_BASE_7
;
379 regNum
= MISCREG_MTRR_PHYS_MASK_7
;
382 regNum
= MISCREG_MTRR_FIX_64K_00000
;
385 regNum
= MISCREG_MTRR_FIX_16K_80000
;
388 regNum
= MISCREG_MTRR_FIX_16K_A0000
;
391 regNum
= MISCREG_MTRR_FIX_4K_C0000
;
394 regNum
= MISCREG_MTRR_FIX_4K_C8000
;
397 regNum
= MISCREG_MTRR_FIX_4K_D0000
;
400 regNum
= MISCREG_MTRR_FIX_4K_D8000
;
403 regNum
= MISCREG_MTRR_FIX_4K_E0000
;
406 regNum
= MISCREG_MTRR_FIX_4K_E8000
;
409 regNum
= MISCREG_MTRR_FIX_4K_F0000
;
412 regNum
= MISCREG_MTRR_FIX_4K_F8000
;
415 regNum
= MISCREG_PAT
;
418 regNum
= MISCREG_DEF_TYPE
;
421 regNum
= MISCREG_MC0_CTL
;
424 regNum
= MISCREG_MC1_CTL
;
427 regNum
= MISCREG_MC2_CTL
;
430 regNum
= MISCREG_MC3_CTL
;
433 regNum
= MISCREG_MC4_CTL
;
436 regNum
= MISCREG_MC5_CTL
;
439 regNum
= MISCREG_MC6_CTL
;
442 regNum
= MISCREG_MC7_CTL
;
445 regNum
= MISCREG_MC0_STATUS
;
448 regNum
= MISCREG_MC1_STATUS
;
451 regNum
= MISCREG_MC2_STATUS
;
454 regNum
= MISCREG_MC3_STATUS
;
457 regNum
= MISCREG_MC4_STATUS
;
460 regNum
= MISCREG_MC5_STATUS
;
463 regNum
= MISCREG_MC6_STATUS
;
466 regNum
= MISCREG_MC7_STATUS
;
469 regNum
= MISCREG_MC0_ADDR
;
472 regNum
= MISCREG_MC1_ADDR
;
475 regNum
= MISCREG_MC2_ADDR
;
478 regNum
= MISCREG_MC3_ADDR
;
481 regNum
= MISCREG_MC4_ADDR
;
484 regNum
= MISCREG_MC5_ADDR
;
487 regNum
= MISCREG_MC6_ADDR
;
490 regNum
= MISCREG_MC7_ADDR
;
493 regNum
= MISCREG_MC0_MISC
;
496 regNum
= MISCREG_MC1_MISC
;
499 regNum
= MISCREG_MC2_MISC
;
502 regNum
= MISCREG_MC3_MISC
;
505 regNum
= MISCREG_MC4_MISC
;
508 regNum
= MISCREG_MC5_MISC
;
511 regNum
= MISCREG_MC6_MISC
;
514 regNum
= MISCREG_MC7_MISC
;
517 regNum
= MISCREG_EFER
;
520 regNum
= MISCREG_STAR
;
523 regNum
= MISCREG_LSTAR
;
526 regNum
= MISCREG_CSTAR
;
529 regNum
= MISCREG_SF_MASK
;
532 regNum
= MISCREG_FS_BASE
;
535 regNum
= MISCREG_GS_BASE
;
538 regNum
= MISCREG_KERNEL_GS_BASE
;
541 regNum
= MISCREG_TSC_AUX
;
544 regNum
= MISCREG_PERF_EVT_SEL0
;
547 regNum
= MISCREG_PERF_EVT_SEL1
;
550 regNum
= MISCREG_PERF_EVT_SEL2
;
553 regNum
= MISCREG_PERF_EVT_SEL3
;
556 regNum
= MISCREG_PERF_EVT_CTR0
;
559 regNum
= MISCREG_PERF_EVT_CTR1
;
562 regNum
= MISCREG_PERF_EVT_CTR2
;
565 regNum
= MISCREG_PERF_EVT_CTR3
;
568 regNum
= MISCREG_SYSCFG
;
571 regNum
= MISCREG_IORR_BASE0
;
574 regNum
= MISCREG_IORR_BASE1
;
577 regNum
= MISCREG_IORR_MASK0
;
580 regNum
= MISCREG_IORR_MASK1
;
583 regNum
= MISCREG_TOP_MEM
;
586 regNum
= MISCREG_TOP_MEM2
;
589 regNum
= MISCREG_VM_CR
;
592 regNum
= MISCREG_IGNNE
;
595 regNum
= MISCREG_SMM_CTL
;
598 regNum
= MISCREG_VM_HSAVE_PA
;
601 return std::make_shared
<GeneralProtection
>(0);
603 //The index is multiplied by the size of a MiscReg so that
604 //any memory dependence calculations will not see these as
606 req
->setPaddr(regNum
* sizeof(RegVal
));
608 } else if (prefix
== IntAddrPrefixIO
) {
609 // TODO If CPL > IOPL or in virtual mode, check the I/O permission
610 // bitmap in the TSS.
612 Addr IOPort
= vaddr
& ~IntAddrPrefixMask
;
613 // Make sure the address fits in the expected 16 bit IO address
615 assert(!(IOPort
& ~0xFFFF));
617 if (IOPort
== 0xCF8 && req
->getSize() == 4) {
618 req
->setFlags(Request::MMAPPED_IPR
);
619 req
->setPaddr(MISCREG_PCI_CONFIG_ADDRESS
* sizeof(RegVal
));
620 } else if ((IOPort
& ~mask(2)) == 0xCFC) {
621 req
->setFlags(Request::UNCACHEABLE
);
624 tc
->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS
);
626 if (bits(configAddress
, 31, 31)) {
627 req
->setPaddr(PhysAddrPrefixPciConfig
|
628 mbits(configAddress
, 30, 2) |
631 req
->setPaddr(PhysAddrPrefixIO
| IOPort
);
634 req
->setFlags(Request::UNCACHEABLE
);
635 req
->setPaddr(PhysAddrPrefixIO
| IOPort
);
639 panic("Access to unrecognized internal address space %#x.\n",
645 * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
646 * and false on a TLB miss.
647 * Many of the checks about different modes have been converted to
648 * assertions, since these parts of the code are not really used.
649 * On a hit it will update the LRU stack.
652 GpuTLB::tlbLookup(const RequestPtr
&req
,
653 ThreadContext
*tc
, bool update_stats
)
655 bool tlb_hit
= false;
657 uint32_t flags
= req
->getFlags();
658 int seg
= flags
& SegmentFlagMask
;
661 assert(seg
!= SEGMENT_REG_MS
);
662 Addr vaddr
= req
->getVaddr();
663 DPRINTF(GPUTLB
, "TLB Lookup for vaddr %#x.\n", vaddr
);
664 HandyM5Reg m5Reg
= tc
->readMiscRegNoEffect(MISCREG_M5_REG
);
667 DPRINTF(GPUTLB
, "In protected mode.\n");
668 // make sure we are in 64-bit mode
669 assert(m5Reg
.mode
== LongMode
);
671 // If paging is enabled, do the translation.
673 DPRINTF(GPUTLB
, "Paging enabled.\n");
674 //update LRU stack on a hit
675 TlbEntry
*entry
= lookup(vaddr
, true);
681 // functional tlb access for memory initialization
682 // i.e., memory seeding or instr. seeding -> don't update
687 localNumTLBAccesses
++;
701 GpuTLB::translate(const RequestPtr
&req
, ThreadContext
*tc
,
702 Translation
*translation
, Mode mode
,
703 bool &delayedResponse
, bool timing
, int &latency
)
705 uint32_t flags
= req
->getFlags();
706 int seg
= flags
& SegmentFlagMask
;
707 bool storeCheck
= flags
& (StoreCheck
<< FlagShift
);
709 // If this is true, we're dealing with a request
710 // to a non-memory address space.
711 if (seg
== SEGMENT_REG_MS
) {
712 return translateInt(req
, tc
);
715 delayedResponse
= false;
716 Addr vaddr
= req
->getVaddr();
717 DPRINTF(GPUTLB
, "Translating vaddr %#x.\n", vaddr
);
719 HandyM5Reg m5Reg
= tc
->readMiscRegNoEffect(MISCREG_M5_REG
);
721 // If protected mode has been enabled...
723 DPRINTF(GPUTLB
, "In protected mode.\n");
724 // If we're not in 64-bit mode, do protection/limit checks
725 if (m5Reg
.mode
!= LongMode
) {
726 DPRINTF(GPUTLB
, "Not in long mode. Checking segment "
729 // Check for a null segment selector.
730 if (!(seg
== SEGMENT_REG_TSG
|| seg
== SYS_SEGMENT_REG_IDTR
||
731 seg
== SEGMENT_REG_HS
|| seg
== SEGMENT_REG_LS
)
732 && !tc
->readMiscRegNoEffect(MISCREG_SEG_SEL(seg
))) {
733 return std::make_shared
<GeneralProtection
>(0);
736 bool expandDown
= false;
737 SegAttr attr
= tc
->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg
));
739 if (seg
>= SEGMENT_REG_ES
&& seg
<= SEGMENT_REG_HS
) {
740 if (!attr
.writable
&& (mode
== BaseTLB::Write
||
742 return std::make_shared
<GeneralProtection
>(0);
744 if (!attr
.readable
&& mode
== BaseTLB::Read
)
745 return std::make_shared
<GeneralProtection
>(0);
747 expandDown
= attr
.expandDown
;
751 Addr base
= tc
->readMiscRegNoEffect(MISCREG_SEG_BASE(seg
));
752 Addr limit
= tc
->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg
));
753 // This assumes we're not in 64 bit mode. If we were, the
754 // default address size is 64 bits, overridable to 32.
756 bool sizeOverride
= (flags
& (AddrSizeFlagBit
<< FlagShift
));
757 SegAttr csAttr
= tc
->readMiscRegNoEffect(MISCREG_CS_ATTR
);
759 if ((csAttr
.defaultSize
&& sizeOverride
) ||
760 (!csAttr
.defaultSize
&& !sizeOverride
)) {
764 Addr offset
= bits(vaddr
- base
, size
- 1, 0);
765 Addr endOffset
= offset
+ req
->getSize() - 1;
768 DPRINTF(GPUTLB
, "Checking an expand down segment.\n");
769 warn_once("Expand down segments are untested.\n");
771 if (offset
<= limit
|| endOffset
<= limit
)
772 return std::make_shared
<GeneralProtection
>(0);
774 if (offset
> limit
|| endOffset
> limit
)
775 return std::make_shared
<GeneralProtection
>(0);
779 // If paging is enabled, do the translation.
781 DPRINTF(GPUTLB
, "Paging enabled.\n");
782 // The vaddr already has the segment base applied.
783 TlbEntry
*entry
= lookup(vaddr
);
784 localNumTLBAccesses
++;
789 latency
= missLatency1
;
793 fatal("GpuTLB doesn't support full-system mode\n");
795 DPRINTF(GPUTLB
, "Handling a TLB miss for address %#x "
796 "at pc %#x.\n", vaddr
, tc
->instAddr());
798 Process
*p
= tc
->getProcessPtr();
799 const EmulationPageTable::Entry
*pte
=
800 p
->pTable
->lookup(vaddr
);
802 if (!pte
&& mode
!= BaseTLB::Execute
) {
803 // penalize a "page fault" more
805 latency
+= missLatency2
;
807 if (p
->fixupStackFault(vaddr
))
808 pte
= p
->pTable
->lookup(vaddr
);
812 return std::make_shared
<PageFault
>(vaddr
, true,
816 Addr alignedVaddr
= p
->pTable
->pageAlign(vaddr
);
818 DPRINTF(GPUTLB
, "Mapping %#x to %#x\n",
819 alignedVaddr
, pte
->paddr
);
821 TlbEntry
gpuEntry(p
->pid(), alignedVaddr
,
822 pte
->paddr
, false, false);
823 entry
= insert(alignedVaddr
, gpuEntry
);
826 DPRINTF(GPUTLB
, "Miss was serviced.\n");
832 latency
= hitLatency
;
836 // Do paging protection checks.
837 bool inUser
= (m5Reg
.cpl
== 3 &&
838 !(flags
& (CPL0FlagBit
<< FlagShift
)));
840 CR0 cr0
= tc
->readMiscRegNoEffect(MISCREG_CR0
);
841 bool badWrite
= (!entry
->writable
&& (inUser
|| cr0
.wp
));
843 if ((inUser
&& !entry
->user
) || (mode
== BaseTLB::Write
&&
845 // The page must have been present to get into the TLB in
846 // the first place. We'll assume the reserved bits are
847 // fine even though we're not checking them.
848 return std::make_shared
<PageFault
>(vaddr
, true, mode
,
852 if (storeCheck
&& badWrite
) {
853 // This would fault if this were a write, so return a page
854 // fault that reflects that happening.
855 return std::make_shared
<PageFault
>(vaddr
, true,
861 DPRINTF(GPUTLB
, "Entry found with paddr %#x, doing protection "
862 "checks.\n", entry
->paddr
);
864 int page_size
= entry
->size();
865 Addr paddr
= entry
->paddr
| (vaddr
& (page_size
- 1));
866 DPRINTF(GPUTLB
, "Translated %#x -> %#x.\n", vaddr
, paddr
);
867 req
->setPaddr(paddr
);
869 if (entry
->uncacheable
)
870 req
->setFlags(Request::UNCACHEABLE
);
872 //Use the address which already has segmentation applied.
873 DPRINTF(GPUTLB
, "Paging disabled.\n");
874 DPRINTF(GPUTLB
, "Translated %#x -> %#x.\n", vaddr
, vaddr
);
875 req
->setPaddr(vaddr
);
879 DPRINTF(GPUTLB
, "In real mode.\n");
880 DPRINTF(GPUTLB
, "Translated %#x -> %#x.\n", vaddr
, vaddr
);
881 req
->setPaddr(vaddr
);
884 // Check for an access to the local APIC
886 LocalApicBase localApicBase
=
887 tc
->readMiscRegNoEffect(MISCREG_APIC_BASE
);
889 Addr baseAddr
= localApicBase
.base
* PageBytes
;
890 Addr paddr
= req
->getPaddr();
892 if (baseAddr
<= paddr
&& baseAddr
+ PageBytes
> paddr
) {
893 // Force the access to be uncacheable.
894 req
->setFlags(Request::UNCACHEABLE
);
895 req
->setPaddr(x86LocalAPICAddress(tc
->contextId(),
904 GpuTLB::translateAtomic(const RequestPtr
&req
, ThreadContext
*tc
,
905 Mode mode
, int &latency
)
907 bool delayedResponse
;
909 return GpuTLB::translate(req
, tc
, nullptr, mode
, delayedResponse
, false,
914 GpuTLB::translateTiming(const RequestPtr
&req
, ThreadContext
*tc
,
915 Translation
*translation
, Mode mode
, int &latency
)
917 bool delayedResponse
;
920 Fault fault
= GpuTLB::translate(req
, tc
, translation
, mode
,
921 delayedResponse
, true, latency
);
923 if (!delayedResponse
)
924 translation
->finish(fault
, req
, tc
, mode
);
935 GpuTLB::serialize(CheckpointOut
&cp
) const
940 GpuTLB::unserialize(CheckpointIn
&cp
)
947 ClockedObject::regStats();
950 .name(name() + ".local_TLB_accesses")
951 .desc("Number of TLB accesses")
955 .name(name() + ".local_TLB_hits")
956 .desc("Number of TLB hits")
960 .name(name() + ".local_TLB_misses")
961 .desc("Number of TLB misses")
965 .name(name() + ".local_TLB_miss_rate")
966 .desc("TLB miss rate")
970 .name(name() + ".access_cycles")
971 .desc("Cycles spent accessing this TLB level")
975 .name(name() + ".page_table_cycles")
976 .desc("Cycles spent accessing the page table")
979 localTLBMissRate
= 100 * localNumTLBMisses
/ localNumTLBAccesses
;
982 .name(name() + ".unique_pages")
983 .desc("Number of unique pages touched")
987 .name(name() + ".local_cycles")
988 .desc("Number of cycles spent in queue for all incoming reqs")
992 .name(name() + ".local_latency")
993 .desc("Avg. latency over incoming coalesced reqs")
996 localLatency
= localCycles
/ localNumTLBAccesses
;
999 .name(name() + ".global_TLB_accesses")
1000 .desc("Number of TLB accesses")
1004 .name(name() + ".global_TLB_hits")
1005 .desc("Number of TLB hits")
1009 .name(name() + ".global_TLB_misses")
1010 .desc("Number of TLB misses")
1014 .name(name() + ".global_TLB_miss_rate")
1015 .desc("TLB miss rate")
1018 globalTLBMissRate
= 100 * globalNumTLBMisses
/ globalNumTLBAccesses
;
1021 .name(name() + ".avg_reuse_distance")
1022 .desc("avg. reuse distance over all pages (in ticks)")
1028 * Do the TLB lookup for this coalesced request and schedule
1029 * another event <TLB access latency> cycles later.
1033 GpuTLB::issueTLBLookup(PacketPtr pkt
)
1036 assert(pkt
->senderState
);
1038 Addr virt_page_addr
= roundDown(pkt
->req
->getVaddr(),
1041 TranslationState
*sender_state
=
1042 safe_cast
<TranslationState
*>(pkt
->senderState
);
1044 bool update_stats
= !sender_state
->prefetch
;
1045 ThreadContext
* tmp_tc
= sender_state
->tc
;
1047 DPRINTF(GPUTLB
, "Translation req. for virt. page addr %#x\n",
1050 int req_cnt
= sender_state
->reqCnt
.back();
1053 accessCycles
-= (curTick() * req_cnt
);
1054 localCycles
-= curTick();
1055 updatePageFootprint(virt_page_addr
);
1056 globalNumTLBAccesses
+= req_cnt
;
1059 tlbOutcome lookup_outcome
= TLB_MISS
;
1060 const RequestPtr
&tmp_req
= pkt
->req
;
1062 // Access the TLB and figure out if it's a hit or a miss.
1063 bool success
= tlbLookup(tmp_req
, tmp_tc
, update_stats
);
1066 lookup_outcome
= TLB_HIT
;
1067 // Put the entry in SenderState
1068 TlbEntry
*entry
= lookup(tmp_req
->getVaddr(), false);
1071 auto p
= sender_state
->tc
->getProcessPtr();
1072 sender_state
->tlbEntry
=
1073 new TlbEntry(p
->pid(), entry
->vaddr
, entry
->paddr
,
1077 // the reqCnt has an entry per level, so its size tells us
1078 // which level we are in
1079 sender_state
->hitLevel
= sender_state
->reqCnt
.size();
1080 globalNumTLBHits
+= req_cnt
;
1084 globalNumTLBMisses
+= req_cnt
;
1088 * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1089 * as the TLB access latency.
1091 * We create and schedule a new TLBEvent which will help us take the
1092 * appropriate actions (e.g., update TLB on a hit, send request to lower
1093 * level TLB on a miss, or start a page walk if this was the last-level
1096 TLBEvent
*tlb_event
=
1097 new TLBEvent(this, virt_page_addr
, lookup_outcome
, pkt
);
1099 if (translationReturnEvent
.count(virt_page_addr
)) {
1100 panic("Virtual Page Address %#x already has a return event\n",
1104 translationReturnEvent
[virt_page_addr
] = tlb_event
;
1107 DPRINTF(GPUTLB
, "schedule translationReturnEvent @ curTick %d\n",
1108 curTick() + this->ticks(hitLatency
));
1110 schedule(tlb_event
, curTick() + this->ticks(hitLatency
));
1113 GpuTLB::TLBEvent::TLBEvent(GpuTLB
* _tlb
, Addr _addr
, tlbOutcome tlb_outcome
,
1115 : Event(CPU_Tick_Pri
), tlb(_tlb
), virtPageAddr(_addr
),
1116 outcome(tlb_outcome
), pkt(_pkt
)
1121 * Do Paging protection checks. If we encounter a page fault, then
1122 * an assertion is fired.
1125 GpuTLB::pagingProtectionChecks(ThreadContext
*tc
, PacketPtr pkt
,
1126 TlbEntry
* tlb_entry
, Mode mode
)
1128 HandyM5Reg m5Reg
= tc
->readMiscRegNoEffect(MISCREG_M5_REG
);
1129 uint32_t flags
= pkt
->req
->getFlags();
1130 bool storeCheck
= flags
& (StoreCheck
<< FlagShift
);
1132 // Do paging protection checks.
1133 bool inUser
= (m5Reg
.cpl
== 3 && !(flags
& (CPL0FlagBit
<< FlagShift
)));
1134 CR0 cr0
= tc
->readMiscRegNoEffect(MISCREG_CR0
);
1136 bool badWrite
= (!tlb_entry
->writable
&& (inUser
|| cr0
.wp
));
1138 if ((inUser
&& !tlb_entry
->user
) ||
1139 (mode
== BaseTLB::Write
&& badWrite
)) {
1140 // The page must have been present to get into the TLB in
1141 // the first place. We'll assume the reserved bits are
1142 // fine even though we're not checking them.
1143 panic("Page fault detected");
1146 if (storeCheck
&& badWrite
) {
1147 // This would fault if this were a write, so return a page
1148 // fault that reflects that happening.
1149 panic("Page fault detected");
1154 * handleTranslationReturn is called on a TLB hit,
1155 * when a TLB miss returns or when a page fault returns.
1156 * The latter calls handelHit with TLB miss as tlbOutcome.
1159 GpuTLB::handleTranslationReturn(Addr virt_page_addr
, tlbOutcome tlb_outcome
,
1164 Addr vaddr
= pkt
->req
->getVaddr();
1166 TranslationState
*sender_state
=
1167 safe_cast
<TranslationState
*>(pkt
->senderState
);
1169 ThreadContext
*tc
= sender_state
->tc
;
1170 Mode mode
= sender_state
->tlbMode
;
1172 TlbEntry
*local_entry
, *new_entry
;
1174 if (tlb_outcome
== TLB_HIT
) {
1175 DPRINTF(GPUTLB
, "Translation Done - TLB Hit for addr %#x\n", vaddr
);
1176 local_entry
= sender_state
->tlbEntry
;
1178 DPRINTF(GPUTLB
, "Translation Done - TLB Miss for addr %#x\n",
1181 // We are returning either from a page walk or from a hit at a lower
1182 // TLB level. The senderState should be "carrying" a pointer to the
1183 // correct TLBEntry.
1184 new_entry
= sender_state
->tlbEntry
;
1186 local_entry
= new_entry
;
1188 if (allocationPolicy
) {
1189 DPRINTF(GPUTLB
, "allocating entry w/ addr %#x\n",
1192 local_entry
= insert(virt_page_addr
, *new_entry
);
1195 assert(local_entry
);
1199 * At this point the packet carries an up-to-date tlbEntry pointer
1200 * in its senderState.
1201 * Next step is to do the paging protection checks.
1203 DPRINTF(GPUTLB
, "Entry found with vaddr %#x, doing protection checks "
1204 "while paddr was %#x.\n", local_entry
->vaddr
,
1205 local_entry
->paddr
);
1207 pagingProtectionChecks(tc
, pkt
, local_entry
, mode
);
1208 int page_size
= local_entry
->size();
1209 Addr paddr
= local_entry
->paddr
| (vaddr
& (page_size
- 1));
1210 DPRINTF(GPUTLB
, "Translated %#x -> %#x.\n", vaddr
, paddr
);
1212 // Since this packet will be sent through the cpu side slave port,
1213 // it must be converted to a response pkt if it is not one already
1214 if (pkt
->isRequest()) {
1215 pkt
->makeTimingResponse();
1218 pkt
->req
->setPaddr(paddr
);
1220 if (local_entry
->uncacheable
) {
1221 pkt
->req
->setFlags(Request::UNCACHEABLE
);
1224 //send packet back to coalescer
1225 cpuSidePort
[0]->sendTimingResp(pkt
);
1226 //schedule cleanup event
1227 cleanupQueue
.push(virt_page_addr
);
1229 // schedule this only once per cycle.
1230 // The check is required because we might have multiple translations
1231 // returning the same cycle
1232 // this is a maximum priority event and must be on the same cycle
1233 // as the cleanup event in TLBCoalescer to avoid a race with
1234 // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1235 if (!cleanupEvent
.scheduled())
1236 schedule(cleanupEvent
, curTick());
1240 * Here we take the appropriate actions based on the result of the
1244 GpuTLB::translationReturn(Addr virtPageAddr
, tlbOutcome outcome
,
1247 DPRINTF(GPUTLB
, "Triggered TLBEvent for addr %#x\n", virtPageAddr
);
1249 assert(translationReturnEvent
[virtPageAddr
]);
1252 TranslationState
*tmp_sender_state
=
1253 safe_cast
<TranslationState
*>(pkt
->senderState
);
1255 int req_cnt
= tmp_sender_state
->reqCnt
.back();
1256 bool update_stats
= !tmp_sender_state
->prefetch
;
1259 if (outcome
== TLB_HIT
) {
1260 handleTranslationReturn(virtPageAddr
, TLB_HIT
, pkt
);
1263 accessCycles
+= (req_cnt
* curTick());
1264 localCycles
+= curTick();
1267 } else if (outcome
== TLB_MISS
) {
1269 DPRINTF(GPUTLB
, "This is a TLB miss\n");
1271 accessCycles
+= (req_cnt
*curTick());
1272 localCycles
+= curTick();
1275 if (hasMemSidePort
) {
1276 // the one cyle added here represent the delay from when we get
1277 // the reply back till when we propagate it to the coalescer
1280 accessCycles
+= (req_cnt
* 1);
1285 * There is a TLB below. Send the coalesced request.
1286 * We actually send the very first packet of all the
1287 * pending packets for this virtual page address.
1289 if (!memSidePort
[0]->sendTimingReq(pkt
)) {
1290 DPRINTF(GPUTLB
, "Failed sending translation request to "
1291 "lower level TLB for addr %#x\n", virtPageAddr
);
1293 memSidePort
[0]->retries
.push_back(pkt
);
1295 DPRINTF(GPUTLB
, "Sent translation request to lower level "
1296 "TLB for addr %#x\n", virtPageAddr
);
1299 //this is the last level TLB. Start a page walk
1300 DPRINTF(GPUTLB
, "Last level TLB - start a page walk for "
1301 "addr %#x\n", virtPageAddr
);
1304 pageTableCycles
-= (req_cnt
*curTick());
1306 TLBEvent
*tlb_event
= translationReturnEvent
[virtPageAddr
];
1308 tlb_event
->updateOutcome(PAGE_WALK
);
1309 schedule(tlb_event
, curTick() + ticks(missLatency2
));
1311 } else if (outcome
== PAGE_WALK
) {
1313 pageTableCycles
+= (req_cnt
*curTick());
1315 // Need to access the page table and update the TLB
1316 DPRINTF(GPUTLB
, "Doing a page walk for address %#x\n",
1319 TranslationState
*sender_state
=
1320 safe_cast
<TranslationState
*>(pkt
->senderState
);
1322 Process
*p
= sender_state
->tc
->getProcessPtr();
1323 Addr vaddr
= pkt
->req
->getVaddr();
1325 Addr alignedVaddr
= p
->pTable
->pageAlign(vaddr
);
1326 assert(alignedVaddr
== virtPageAddr
);
1328 const EmulationPageTable::Entry
*pte
= p
->pTable
->lookup(vaddr
);
1329 if (!pte
&& sender_state
->tlbMode
!= BaseTLB::Execute
&&
1330 p
->fixupStackFault(vaddr
)) {
1331 pte
= p
->pTable
->lookup(vaddr
);
1335 DPRINTF(GPUTLB
, "Mapping %#x to %#x\n", alignedVaddr
,
1338 sender_state
->tlbEntry
=
1339 new TlbEntry(p
->pid(), virtPageAddr
, pte
->paddr
, false,
1342 sender_state
->tlbEntry
= nullptr;
1345 handleTranslationReturn(virtPageAddr
, TLB_MISS
, pkt
);
1346 } else if (outcome
== MISS_RETURN
) {
1347 /** we add an extra cycle in the return path of the translation
1348 * requests in between the various TLB levels.
1350 handleTranslationReturn(virtPageAddr
, TLB_MISS
, pkt
);
1352 panic("Unexpected TLB outcome %d", outcome
);
1357 GpuTLB::TLBEvent::process()
1359 tlb
->translationReturn(virtPageAddr
, outcome
, pkt
);
1363 GpuTLB::TLBEvent::description() const
1365 return "trigger translationDoneEvent";
1369 GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome
)
1375 GpuTLB::TLBEvent::getTLBEventVaddr()
1377 return virtPageAddr
;
1381 * recvTiming receives a coalesced timing request from a TLBCoalescer
1382 * and it calls issueTLBLookup()
1383 * It only rejects the packet if we have exceeded the max
1384 * outstanding number of requests for the TLB
1387 GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt
)
1389 if (tlb
->outstandingReqs
< tlb
->maxCoalescedReqs
) {
1390 tlb
->issueTLBLookup(pkt
);
1391 // update number of outstanding translation requests
1392 tlb
->outstandingReqs
++;
1395 DPRINTF(GPUTLB
, "Reached maxCoalescedReqs number %d\n",
1396 tlb
->outstandingReqs
);
1402 * handleFuncTranslationReturn is called on a TLB hit,
1403 * when a TLB miss returns or when a page fault returns.
1404 * It updates LRU, inserts the TLB entry on a miss
1405 * depending on the allocation policy and does the required
1406 * protection checks. It does NOT create a new packet to
1407 * update the packet's addr; this is done in hsail-gpu code.
1410 GpuTLB::handleFuncTranslationReturn(PacketPtr pkt
, tlbOutcome tlb_outcome
)
1412 TranslationState
*sender_state
=
1413 safe_cast
<TranslationState
*>(pkt
->senderState
);
1415 ThreadContext
*tc
= sender_state
->tc
;
1416 Mode mode
= sender_state
->tlbMode
;
1417 Addr vaddr
= pkt
->req
->getVaddr();
1419 TlbEntry
*local_entry
, *new_entry
;
1421 if (tlb_outcome
== TLB_HIT
) {
1422 DPRINTF(GPUTLB
, "Functional Translation Done - TLB hit for addr "
1425 local_entry
= sender_state
->tlbEntry
;
1427 DPRINTF(GPUTLB
, "Functional Translation Done - TLB miss for addr "
1430 // We are returning either from a page walk or from a hit at a lower
1431 // TLB level. The senderState should be "carrying" a pointer to the
1432 // correct TLBEntry.
1433 new_entry
= sender_state
->tlbEntry
;
1435 local_entry
= new_entry
;
1437 if (allocationPolicy
) {
1438 Addr virt_page_addr
= roundDown(vaddr
, TheISA::PageBytes
);
1440 DPRINTF(GPUTLB
, "allocating entry w/ addr %#x\n",
1443 local_entry
= insert(virt_page_addr
, *new_entry
);
1446 assert(local_entry
);
1449 DPRINTF(GPUTLB
, "Entry found with vaddr %#x, doing protection checks "
1450 "while paddr was %#x.\n", local_entry
->vaddr
,
1451 local_entry
->paddr
);
1454 * Do paging checks if it's a normal functional access. If it's for a
1455 * prefetch, then sometimes you can try to prefetch something that
1456 * won't pass protection. We don't actually want to fault becuase there
1457 * is no demand access to deem this a violation. Just put it in the
1458 * TLB and it will fault if indeed a future demand access touches it in
1461 * This feature could be used to explore security issues around
1462 * speculative memory accesses.
1464 if (!sender_state
->prefetch
&& sender_state
->tlbEntry
)
1465 pagingProtectionChecks(tc
, pkt
, local_entry
, mode
);
1467 int page_size
= local_entry
->size();
1468 Addr paddr
= local_entry
->paddr
| (vaddr
& (page_size
- 1));
1469 DPRINTF(GPUTLB
, "Translated %#x -> %#x.\n", vaddr
, paddr
);
1471 pkt
->req
->setPaddr(paddr
);
1473 if (local_entry
->uncacheable
)
1474 pkt
->req
->setFlags(Request::UNCACHEABLE
);
1477 // This is used for atomic translations. Need to
1478 // make it all happen during the same cycle.
1480 GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt
)
1482 TranslationState
*sender_state
=
1483 safe_cast
<TranslationState
*>(pkt
->senderState
);
1485 ThreadContext
*tc
= sender_state
->tc
;
1486 bool update_stats
= !sender_state
->prefetch
;
1488 Addr virt_page_addr
= roundDown(pkt
->req
->getVaddr(),
1492 tlb
->updatePageFootprint(virt_page_addr
);
1494 // do the TLB lookup without updating the stats
1495 bool success
= tlb
->tlbLookup(pkt
->req
, tc
, update_stats
);
1496 tlbOutcome tlb_outcome
= success
? TLB_HIT
: TLB_MISS
;
1498 // functional mode means no coalescing
1499 // global metrics are the same as the local metrics
1501 tlb
->globalNumTLBAccesses
++;
1504 sender_state
->hitLevel
= sender_state
->reqCnt
.size();
1505 tlb
->globalNumTLBHits
++;
1511 tlb
->globalNumTLBMisses
++;
1512 if (tlb
->hasMemSidePort
) {
1513 // there is a TLB below -> propagate down the TLB hierarchy
1514 tlb
->memSidePort
[0]->sendFunctional(pkt
);
1515 // If no valid translation from a prefetch, then just return
1516 if (sender_state
->prefetch
&& !pkt
->req
->hasPaddr())
1519 // Need to access the page table and update the TLB
1520 DPRINTF(GPUTLB
, "Doing a page walk for address %#x\n",
1523 Process
*p
= tc
->getProcessPtr();
1525 Addr vaddr
= pkt
->req
->getVaddr();
1527 Addr alignedVaddr
= p
->pTable
->pageAlign(vaddr
);
1528 assert(alignedVaddr
== virt_page_addr
);
1531 const EmulationPageTable::Entry
*pte
=
1532 p
->pTable
->lookup(vaddr
);
1533 if (!pte
&& sender_state
->tlbMode
!= BaseTLB::Execute
&&
1534 p
->fixupStackFault(vaddr
)) {
1535 pte
= p
->pTable
->lookup(vaddr
);
1538 if (!sender_state
->prefetch
) {
1539 // no PageFaults are permitted after
1540 // the second page table lookup
1543 DPRINTF(GPUTLB
, "Mapping %#x to %#x\n", alignedVaddr
,
1546 sender_state
->tlbEntry
=
1547 new TlbEntry(p
->pid(), virt_page_addr
,
1548 pte
->paddr
, false, false);
1550 // If this was a prefetch, then do the normal thing if it
1551 // was a successful translation. Otherwise, send an empty
1552 // TLB entry back so that it can be figured out as empty and
1553 // handled accordingly.
1555 DPRINTF(GPUTLB
, "Mapping %#x to %#x\n", alignedVaddr
,
1558 sender_state
->tlbEntry
=
1559 new TlbEntry(p
->pid(), virt_page_addr
,
1560 pte
->paddr
, false, false);
1562 DPRINTF(GPUPrefetch
, "Prefetch failed %#x\n",
1565 sender_state
->tlbEntry
= nullptr;
1572 DPRINTF(GPUPrefetch
, "Functional Hit for vaddr %#x\n",
1573 tlb
->lookup(pkt
->req
->getVaddr()));
1575 TlbEntry
*entry
= tlb
->lookup(pkt
->req
->getVaddr(),
1580 auto p
= sender_state
->tc
->getProcessPtr();
1581 sender_state
->tlbEntry
=
1582 new TlbEntry(p
->pid(), entry
->vaddr
, entry
->paddr
,
1585 // This is the function that would populate pkt->req with the paddr of
1586 // the translation. But if no translation happens (i.e Prefetch fails)
1587 // then the early returns in the above code wiill keep this function
1589 tlb
->handleFuncTranslationReturn(pkt
, tlb_outcome
);
1593 GpuTLB::CpuSidePort::recvReqRetry()
1595 // The CPUSidePort never sends anything but replies. No retries
1597 panic("recvReqRetry called");
1601 GpuTLB::CpuSidePort::getAddrRanges() const
1603 // currently not checked by the master
1604 AddrRangeList ranges
;
1610 * MemSidePort receives the packet back.
1611 * We need to call the handleTranslationReturn
1612 * and propagate up the hierarchy.
1615 GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt
)
1617 Addr virt_page_addr
= roundDown(pkt
->req
->getVaddr(),
1620 DPRINTF(GPUTLB
, "MemSidePort recvTiming for virt_page_addr %#x\n",
1623 TLBEvent
*tlb_event
= tlb
->translationReturnEvent
[virt_page_addr
];
1625 assert(virt_page_addr
== tlb_event
->getTLBEventVaddr());
1627 tlb_event
->updateOutcome(MISS_RETURN
);
1628 tlb
->schedule(tlb_event
, curTick()+tlb
->ticks(1));
1634 GpuTLB::MemSidePort::recvReqRetry()
1636 // No retries should reach the TLB. The retries
1637 // should only reach the TLBCoalescer.
1638 panic("recvReqRetry called");
1644 while (!cleanupQueue
.empty()) {
1645 Addr cleanup_addr
= cleanupQueue
.front();
1649 TLBEvent
* old_tlb_event
= translationReturnEvent
[cleanup_addr
];
1650 delete old_tlb_event
;
1651 translationReturnEvent
.erase(cleanup_addr
);
1653 // update number of outstanding requests
1657 /** the higher level coalescer should retry if it has
1658 * any pending requests.
1660 for (int i
= 0; i
< cpuSidePort
.size(); ++i
) {
1661 cpuSidePort
[i
]->sendRetryReq();
1666 GpuTLB::updatePageFootprint(Addr virt_page_addr
)
1669 std::pair
<AccessPatternTable::iterator
, bool> ret
;
1671 AccessInfo tmp_access_info
;
1672 tmp_access_info
.lastTimeAccessed
= 0;
1673 tmp_access_info
.accessesPerPage
= 0;
1674 tmp_access_info
.totalReuseDistance
= 0;
1675 tmp_access_info
.sumDistance
= 0;
1676 tmp_access_info
.meanDistance
= 0;
1678 ret
= TLBFootprint
.insert(AccessPatternTable::value_type(virt_page_addr
,
1681 bool first_page_access
= ret
.second
;
1683 if (first_page_access
) {
1686 int accessed_before
;
1687 accessed_before
= curTick() - ret
.first
->second
.lastTimeAccessed
;
1688 ret
.first
->second
.totalReuseDistance
+= accessed_before
;
1691 ret
.first
->second
.accessesPerPage
++;
1692 ret
.first
->second
.lastTimeAccessed
= curTick();
1694 if (accessDistance
) {
1695 ret
.first
->second
.localTLBAccesses
1696 .push_back(localNumTLBAccesses
.value());
1701 GpuTLB::exitCallback()
1703 std::ostream
*page_stat_file
= nullptr;
1705 if (accessDistance
) {
1707 // print per page statistics to a separate file (.csv format)
1708 // simout is the gem5 output directory (default is m5out or the one
1709 // specified with -d
1710 page_stat_file
= simout
.create(name().c_str())->stream();
1713 *page_stat_file
<< "page,max_access_distance,mean_access_distance, "
1714 << "stddev_distance" << std::endl
;
1717 // update avg. reuse distance footprint
1718 AccessPatternTable::iterator iter
, iter_begin
, iter_end
;
1719 unsigned int sum_avg_reuse_distance_per_page
= 0;
1721 // iterate through all pages seen by this TLB
1722 for (iter
= TLBFootprint
.begin(); iter
!= TLBFootprint
.end(); iter
++) {
1723 sum_avg_reuse_distance_per_page
+= iter
->second
.totalReuseDistance
/
1724 iter
->second
.accessesPerPage
;
1726 if (accessDistance
) {
1727 unsigned int tmp
= iter
->second
.localTLBAccesses
[0];
1728 unsigned int prev
= tmp
;
1730 for (int i
= 0; i
< iter
->second
.localTLBAccesses
.size(); ++i
) {
1735 prev
= iter
->second
.localTLBAccesses
[i
];
1736 // update the localTLBAccesses value
1737 // with the actual differece
1738 iter
->second
.localTLBAccesses
[i
] -= tmp
;
1739 // compute the sum of AccessDistance per page
1740 // used later for mean
1741 iter
->second
.sumDistance
+=
1742 iter
->second
.localTLBAccesses
[i
];
1745 iter
->second
.meanDistance
=
1746 iter
->second
.sumDistance
/ iter
->second
.accessesPerPage
;
1748 // compute std_dev and max (we need a second round because we
1749 // need to know the mean value
1750 unsigned int max_distance
= 0;
1751 unsigned int stddev_distance
= 0;
1753 for (int i
= 0; i
< iter
->second
.localTLBAccesses
.size(); ++i
) {
1754 unsigned int tmp_access_distance
=
1755 iter
->second
.localTLBAccesses
[i
];
1757 if (tmp_access_distance
> max_distance
) {
1758 max_distance
= tmp_access_distance
;
1762 tmp_access_distance
- iter
->second
.meanDistance
;
1763 stddev_distance
+= pow(diff
, 2);
1768 sqrt(stddev_distance
/iter
->second
.accessesPerPage
);
1770 if (page_stat_file
) {
1771 *page_stat_file
<< std::hex
<< iter
->first
<< ",";
1772 *page_stat_file
<< std::dec
<< max_distance
<< ",";
1773 *page_stat_file
<< std::dec
<< iter
->second
.meanDistance
1775 *page_stat_file
<< std::dec
<< stddev_distance
;
1776 *page_stat_file
<< std::endl
;
1779 // erase the localTLBAccesses array
1780 iter
->second
.localTLBAccesses
.clear();
1784 if (!TLBFootprint
.empty()) {
1786 sum_avg_reuse_distance_per_page
/ TLBFootprint
.size();
1789 //clear the TLBFootprint map
1790 TLBFootprint
.clear();
1792 } // namespace X86ISA
1795 X86GPUTLBParams::create()
1797 return new X86ISA::GpuTLB(this);