2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
5 * For use for simulation and test purposes only
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
36 #include "gpu-compute/gpu_tlb.hh"
41 #include "arch/x86/faults.hh"
42 #include "arch/x86/insts/microldstop.hh"
43 #include "arch/x86/pagetable.hh"
44 #include "arch/x86/pagetable_walker.hh"
45 #include "arch/x86/regs/misc.hh"
46 #include "arch/x86/x86_traits.hh"
47 #include "base/bitfield.hh"
48 #include "base/logging.hh"
49 #include "base/output.hh"
50 #include "base/trace.hh"
51 #include "cpu/base.hh"
52 #include "cpu/thread_context.hh"
53 #include "debug/GPUPrefetch.hh"
54 #include "debug/GPUTLB.hh"
55 #include "mem/packet_access.hh"
56 #include "mem/page_table.hh"
57 #include "mem/request.hh"
58 #include "sim/process.hh"
63 GpuTLB::GpuTLB(const Params
*p
)
64 : ClockedObject(p
), configAddress(0), size(p
->size
),
65 cleanupEvent([this]{ cleanup(); }, name(), false,
67 exitEvent([this]{ exitCallback(); }, name())
70 assert(assoc
<= size
);
72 allocationPolicy
= p
->allocationPolicy
;
73 hasMemSidePort
= false;
74 accessDistance
= p
->accessDistance
;
75 clock
= p
->clk_domain
->clockPeriod();
77 tlb
.assign(size
, TlbEntry());
79 freeList
.resize(numSets
);
80 entryList
.resize(numSets
);
82 for (int set
= 0; set
< numSets
; ++set
) {
83 for (int way
= 0; way
< assoc
; ++way
) {
84 int x
= set
* assoc
+ way
;
85 freeList
[set
].push_back(&tlb
.at(x
));
92 * @warning: the set-associative version assumes you have a
93 * fixed page size of 4KB.
94 * If the page size is greather than 4KB (as defined in the
95 * TheISA::PageBytes), then there are various issues w/ the current
96 * implementation (you'd have the same 8KB page being replicated in
99 setMask
= numSets
- 1;
102 // GpuTLB doesn't yet support full system
104 walker
->setTLB(this);
107 maxCoalescedReqs
= p
->maxOutstandingReqs
;
109 // Do not allow maxCoalescedReqs to be more than the TLB associativity
110 if (maxCoalescedReqs
> assoc
) {
111 maxCoalescedReqs
= assoc
;
112 cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc
);
116 hitLatency
= p
->hitLatency
;
117 missLatency1
= p
->missLatency1
;
118 missLatency2
= p
->missLatency2
;
120 // create the slave ports based on the number of connected ports
121 for (size_t i
= 0; i
< p
->port_slave_connection_count
; ++i
) {
122 cpuSidePort
.push_back(new CpuSidePort(csprintf("%s-port%d",
123 name(), i
), this, i
));
126 // create the master ports based on the number of connected ports
127 for (size_t i
= 0; i
< p
->port_master_connection_count
; ++i
) {
128 memSidePort
.push_back(new MemSidePort(csprintf("%s-port%d",
129 name(), i
), this, i
));
133 // fixme: this is never called?
136 // make sure all the hash-maps are empty
137 assert(translationReturnEvent
.empty());
141 GpuTLB::getPort(const std::string
&if_name
, PortID idx
)
143 if (if_name
== "slave") {
144 if (idx
>= static_cast<PortID
>(cpuSidePort
.size())) {
145 panic("TLBCoalescer::getPort: unknown index %d\n", idx
);
148 return *cpuSidePort
[idx
];
149 } else if (if_name
== "master") {
150 if (idx
>= static_cast<PortID
>(memSidePort
.size())) {
151 panic("TLBCoalescer::getPort: unknown index %d\n", idx
);
154 hasMemSidePort
= true;
156 return *memSidePort
[idx
];
158 panic("TLBCoalescer::getPort: unknown port %s\n", if_name
);
163 GpuTLB::insert(Addr vpn
, TlbEntry
&entry
)
165 TlbEntry
*newEntry
= nullptr;
168 * vpn holds the virtual page address
169 * The least significant bits are simply masked
171 int set
= (vpn
>> TheISA::PageShift
) & setMask
;
173 if (!freeList
[set
].empty()) {
174 newEntry
= freeList
[set
].front();
175 freeList
[set
].pop_front();
177 newEntry
= entryList
[set
].back();
178 entryList
[set
].pop_back();
182 newEntry
->vaddr
= vpn
;
183 entryList
[set
].push_front(newEntry
);
188 GpuTLB::EntryList::iterator
189 GpuTLB::lookupIt(Addr va
, bool update_lru
)
191 int set
= (va
>> TheISA::PageShift
) & setMask
;
197 auto entry
= entryList
[set
].begin();
198 for (; entry
!= entryList
[set
].end(); ++entry
) {
199 int page_size
= (*entry
)->size();
201 if ((*entry
)->vaddr
<= va
&& (*entry
)->vaddr
+ page_size
> va
) {
202 DPRINTF(GPUTLB
, "Matched vaddr %#x to entry starting at %#x "
203 "with size %#x.\n", va
, (*entry
)->vaddr
, page_size
);
206 entryList
[set
].push_front(*entry
);
207 entryList
[set
].erase(entry
);
208 entry
= entryList
[set
].begin();
219 GpuTLB::lookup(Addr va
, bool update_lru
)
221 int set
= (va
>> TheISA::PageShift
) & setMask
;
223 auto entry
= lookupIt(va
, update_lru
);
225 if (entry
== entryList
[set
].end())
232 GpuTLB::invalidateAll()
234 DPRINTF(GPUTLB
, "Invalidating all entries.\n");
236 for (int i
= 0; i
< numSets
; ++i
) {
237 while (!entryList
[i
].empty()) {
238 TlbEntry
*entry
= entryList
[i
].front();
239 entryList
[i
].pop_front();
240 freeList
[i
].push_back(entry
);
246 GpuTLB::setConfigAddress(uint32_t addr
)
248 configAddress
= addr
;
252 GpuTLB::invalidateNonGlobal()
254 DPRINTF(GPUTLB
, "Invalidating all non global entries.\n");
256 for (int i
= 0; i
< numSets
; ++i
) {
257 for (auto entryIt
= entryList
[i
].begin();
258 entryIt
!= entryList
[i
].end();) {
259 if (!(*entryIt
)->global
) {
260 freeList
[i
].push_back(*entryIt
);
261 entryList
[i
].erase(entryIt
++);
270 GpuTLB::demapPage(Addr va
, uint64_t asn
)
273 int set
= (va
>> TheISA::PageShift
) & setMask
;
274 auto entry
= lookupIt(va
, false);
276 if (entry
!= entryList
[set
].end()) {
277 freeList
[set
].push_back(*entry
);
278 entryList
[set
].erase(entry
);
283 GpuTLB::translateInt(const RequestPtr
&req
, ThreadContext
*tc
)
285 DPRINTF(GPUTLB
, "Addresses references internal memory.\n");
286 Addr vaddr
= req
->getVaddr();
287 Addr prefix
= (vaddr
>> 3) & IntAddrPrefixMask
;
289 if (prefix
== IntAddrPrefixCPUID
) {
290 panic("CPUID memory space not yet implemented!\n");
291 } else if (prefix
== IntAddrPrefixMSR
) {
293 req
->setFlags(Request::MMAPPED_IPR
);
296 switch (vaddr
& ~IntAddrPrefixMask
) {
298 regNum
= MISCREG_TSC
;
301 regNum
= MISCREG_APIC_BASE
;
304 regNum
= MISCREG_MTRRCAP
;
307 regNum
= MISCREG_SYSENTER_CS
;
310 regNum
= MISCREG_SYSENTER_ESP
;
313 regNum
= MISCREG_SYSENTER_EIP
;
316 regNum
= MISCREG_MCG_CAP
;
319 regNum
= MISCREG_MCG_STATUS
;
322 regNum
= MISCREG_MCG_CTL
;
325 regNum
= MISCREG_DEBUG_CTL_MSR
;
328 regNum
= MISCREG_LAST_BRANCH_FROM_IP
;
331 regNum
= MISCREG_LAST_BRANCH_TO_IP
;
334 regNum
= MISCREG_LAST_EXCEPTION_FROM_IP
;
337 regNum
= MISCREG_LAST_EXCEPTION_TO_IP
;
340 regNum
= MISCREG_MTRR_PHYS_BASE_0
;
343 regNum
= MISCREG_MTRR_PHYS_MASK_0
;
346 regNum
= MISCREG_MTRR_PHYS_BASE_1
;
349 regNum
= MISCREG_MTRR_PHYS_MASK_1
;
352 regNum
= MISCREG_MTRR_PHYS_BASE_2
;
355 regNum
= MISCREG_MTRR_PHYS_MASK_2
;
358 regNum
= MISCREG_MTRR_PHYS_BASE_3
;
361 regNum
= MISCREG_MTRR_PHYS_MASK_3
;
364 regNum
= MISCREG_MTRR_PHYS_BASE_4
;
367 regNum
= MISCREG_MTRR_PHYS_MASK_4
;
370 regNum
= MISCREG_MTRR_PHYS_BASE_5
;
373 regNum
= MISCREG_MTRR_PHYS_MASK_5
;
376 regNum
= MISCREG_MTRR_PHYS_BASE_6
;
379 regNum
= MISCREG_MTRR_PHYS_MASK_6
;
382 regNum
= MISCREG_MTRR_PHYS_BASE_7
;
385 regNum
= MISCREG_MTRR_PHYS_MASK_7
;
388 regNum
= MISCREG_MTRR_FIX_64K_00000
;
391 regNum
= MISCREG_MTRR_FIX_16K_80000
;
394 regNum
= MISCREG_MTRR_FIX_16K_A0000
;
397 regNum
= MISCREG_MTRR_FIX_4K_C0000
;
400 regNum
= MISCREG_MTRR_FIX_4K_C8000
;
403 regNum
= MISCREG_MTRR_FIX_4K_D0000
;
406 regNum
= MISCREG_MTRR_FIX_4K_D8000
;
409 regNum
= MISCREG_MTRR_FIX_4K_E0000
;
412 regNum
= MISCREG_MTRR_FIX_4K_E8000
;
415 regNum
= MISCREG_MTRR_FIX_4K_F0000
;
418 regNum
= MISCREG_MTRR_FIX_4K_F8000
;
421 regNum
= MISCREG_PAT
;
424 regNum
= MISCREG_DEF_TYPE
;
427 regNum
= MISCREG_MC0_CTL
;
430 regNum
= MISCREG_MC1_CTL
;
433 regNum
= MISCREG_MC2_CTL
;
436 regNum
= MISCREG_MC3_CTL
;
439 regNum
= MISCREG_MC4_CTL
;
442 regNum
= MISCREG_MC5_CTL
;
445 regNum
= MISCREG_MC6_CTL
;
448 regNum
= MISCREG_MC7_CTL
;
451 regNum
= MISCREG_MC0_STATUS
;
454 regNum
= MISCREG_MC1_STATUS
;
457 regNum
= MISCREG_MC2_STATUS
;
460 regNum
= MISCREG_MC3_STATUS
;
463 regNum
= MISCREG_MC4_STATUS
;
466 regNum
= MISCREG_MC5_STATUS
;
469 regNum
= MISCREG_MC6_STATUS
;
472 regNum
= MISCREG_MC7_STATUS
;
475 regNum
= MISCREG_MC0_ADDR
;
478 regNum
= MISCREG_MC1_ADDR
;
481 regNum
= MISCREG_MC2_ADDR
;
484 regNum
= MISCREG_MC3_ADDR
;
487 regNum
= MISCREG_MC4_ADDR
;
490 regNum
= MISCREG_MC5_ADDR
;
493 regNum
= MISCREG_MC6_ADDR
;
496 regNum
= MISCREG_MC7_ADDR
;
499 regNum
= MISCREG_MC0_MISC
;
502 regNum
= MISCREG_MC1_MISC
;
505 regNum
= MISCREG_MC2_MISC
;
508 regNum
= MISCREG_MC3_MISC
;
511 regNum
= MISCREG_MC4_MISC
;
514 regNum
= MISCREG_MC5_MISC
;
517 regNum
= MISCREG_MC6_MISC
;
520 regNum
= MISCREG_MC7_MISC
;
523 regNum
= MISCREG_EFER
;
526 regNum
= MISCREG_STAR
;
529 regNum
= MISCREG_LSTAR
;
532 regNum
= MISCREG_CSTAR
;
535 regNum
= MISCREG_SF_MASK
;
538 regNum
= MISCREG_FS_BASE
;
541 regNum
= MISCREG_GS_BASE
;
544 regNum
= MISCREG_KERNEL_GS_BASE
;
547 regNum
= MISCREG_TSC_AUX
;
550 regNum
= MISCREG_PERF_EVT_SEL0
;
553 regNum
= MISCREG_PERF_EVT_SEL1
;
556 regNum
= MISCREG_PERF_EVT_SEL2
;
559 regNum
= MISCREG_PERF_EVT_SEL3
;
562 regNum
= MISCREG_PERF_EVT_CTR0
;
565 regNum
= MISCREG_PERF_EVT_CTR1
;
568 regNum
= MISCREG_PERF_EVT_CTR2
;
571 regNum
= MISCREG_PERF_EVT_CTR3
;
574 regNum
= MISCREG_SYSCFG
;
577 regNum
= MISCREG_IORR_BASE0
;
580 regNum
= MISCREG_IORR_BASE1
;
583 regNum
= MISCREG_IORR_MASK0
;
586 regNum
= MISCREG_IORR_MASK1
;
589 regNum
= MISCREG_TOP_MEM
;
592 regNum
= MISCREG_TOP_MEM2
;
595 regNum
= MISCREG_VM_CR
;
598 regNum
= MISCREG_IGNNE
;
601 regNum
= MISCREG_SMM_CTL
;
604 regNum
= MISCREG_VM_HSAVE_PA
;
607 return std::make_shared
<GeneralProtection
>(0);
609 //The index is multiplied by the size of a MiscReg so that
610 //any memory dependence calculations will not see these as
612 req
->setPaddr(regNum
* sizeof(RegVal
));
614 } else if (prefix
== IntAddrPrefixIO
) {
615 // TODO If CPL > IOPL or in virtual mode, check the I/O permission
616 // bitmap in the TSS.
618 Addr IOPort
= vaddr
& ~IntAddrPrefixMask
;
619 // Make sure the address fits in the expected 16 bit IO address
621 assert(!(IOPort
& ~0xFFFF));
623 if (IOPort
== 0xCF8 && req
->getSize() == 4) {
624 req
->setFlags(Request::MMAPPED_IPR
);
625 req
->setPaddr(MISCREG_PCI_CONFIG_ADDRESS
* sizeof(RegVal
));
626 } else if ((IOPort
& ~mask(2)) == 0xCFC) {
627 req
->setFlags(Request::UNCACHEABLE
);
630 tc
->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS
);
632 if (bits(configAddress
, 31, 31)) {
633 req
->setPaddr(PhysAddrPrefixPciConfig
|
634 mbits(configAddress
, 30, 2) |
637 req
->setPaddr(PhysAddrPrefixIO
| IOPort
);
640 req
->setFlags(Request::UNCACHEABLE
);
641 req
->setPaddr(PhysAddrPrefixIO
| IOPort
);
645 panic("Access to unrecognized internal address space %#x.\n",
651 * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
652 * and false on a TLB miss.
653 * Many of the checks about different modes have been converted to
654 * assertions, since these parts of the code are not really used.
655 * On a hit it will update the LRU stack.
658 GpuTLB::tlbLookup(const RequestPtr
&req
,
659 ThreadContext
*tc
, bool update_stats
)
661 bool tlb_hit
= false;
663 uint32_t flags
= req
->getFlags();
664 int seg
= flags
& SegmentFlagMask
;
667 assert(seg
!= SEGMENT_REG_MS
);
668 Addr vaddr
= req
->getVaddr();
669 DPRINTF(GPUTLB
, "TLB Lookup for vaddr %#x.\n", vaddr
);
670 HandyM5Reg m5Reg
= tc
->readMiscRegNoEffect(MISCREG_M5_REG
);
673 DPRINTF(GPUTLB
, "In protected mode.\n");
674 // make sure we are in 64-bit mode
675 assert(m5Reg
.mode
== LongMode
);
677 // If paging is enabled, do the translation.
679 DPRINTF(GPUTLB
, "Paging enabled.\n");
680 //update LRU stack on a hit
681 TlbEntry
*entry
= lookup(vaddr
, true);
687 // functional tlb access for memory initialization
688 // i.e., memory seeding or instr. seeding -> don't update
693 localNumTLBAccesses
++;
707 GpuTLB::translate(const RequestPtr
&req
, ThreadContext
*tc
,
708 Translation
*translation
, Mode mode
,
709 bool &delayedResponse
, bool timing
, int &latency
)
711 uint32_t flags
= req
->getFlags();
712 int seg
= flags
& SegmentFlagMask
;
713 bool storeCheck
= flags
& (StoreCheck
<< FlagShift
);
715 // If this is true, we're dealing with a request
716 // to a non-memory address space.
717 if (seg
== SEGMENT_REG_MS
) {
718 return translateInt(req
, tc
);
721 delayedResponse
= false;
722 Addr vaddr
= req
->getVaddr();
723 DPRINTF(GPUTLB
, "Translating vaddr %#x.\n", vaddr
);
725 HandyM5Reg m5Reg
= tc
->readMiscRegNoEffect(MISCREG_M5_REG
);
727 // If protected mode has been enabled...
729 DPRINTF(GPUTLB
, "In protected mode.\n");
730 // If we're not in 64-bit mode, do protection/limit checks
731 if (m5Reg
.mode
!= LongMode
) {
732 DPRINTF(GPUTLB
, "Not in long mode. Checking segment "
735 // Check for a null segment selector.
736 if (!(seg
== SEGMENT_REG_TSG
|| seg
== SYS_SEGMENT_REG_IDTR
||
737 seg
== SEGMENT_REG_HS
|| seg
== SEGMENT_REG_LS
)
738 && !tc
->readMiscRegNoEffect(MISCREG_SEG_SEL(seg
))) {
739 return std::make_shared
<GeneralProtection
>(0);
742 bool expandDown
= false;
743 SegAttr attr
= tc
->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg
));
745 if (seg
>= SEGMENT_REG_ES
&& seg
<= SEGMENT_REG_HS
) {
746 if (!attr
.writable
&& (mode
== BaseTLB::Write
||
748 return std::make_shared
<GeneralProtection
>(0);
750 if (!attr
.readable
&& mode
== BaseTLB::Read
)
751 return std::make_shared
<GeneralProtection
>(0);
753 expandDown
= attr
.expandDown
;
757 Addr base
= tc
->readMiscRegNoEffect(MISCREG_SEG_BASE(seg
));
758 Addr limit
= tc
->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg
));
759 // This assumes we're not in 64 bit mode. If we were, the
760 // default address size is 64 bits, overridable to 32.
762 bool sizeOverride
= (flags
& (AddrSizeFlagBit
<< FlagShift
));
763 SegAttr csAttr
= tc
->readMiscRegNoEffect(MISCREG_CS_ATTR
);
765 if ((csAttr
.defaultSize
&& sizeOverride
) ||
766 (!csAttr
.defaultSize
&& !sizeOverride
)) {
770 Addr offset
= bits(vaddr
- base
, size
- 1, 0);
771 Addr endOffset
= offset
+ req
->getSize() - 1;
774 DPRINTF(GPUTLB
, "Checking an expand down segment.\n");
775 warn_once("Expand down segments are untested.\n");
777 if (offset
<= limit
|| endOffset
<= limit
)
778 return std::make_shared
<GeneralProtection
>(0);
780 if (offset
> limit
|| endOffset
> limit
)
781 return std::make_shared
<GeneralProtection
>(0);
785 // If paging is enabled, do the translation.
787 DPRINTF(GPUTLB
, "Paging enabled.\n");
788 // The vaddr already has the segment base applied.
789 TlbEntry
*entry
= lookup(vaddr
);
790 localNumTLBAccesses
++;
795 latency
= missLatency1
;
799 fatal("GpuTLB doesn't support full-system mode\n");
801 DPRINTF(GPUTLB
, "Handling a TLB miss for address %#x "
802 "at pc %#x.\n", vaddr
, tc
->instAddr());
804 Process
*p
= tc
->getProcessPtr();
805 const EmulationPageTable::Entry
*pte
=
806 p
->pTable
->lookup(vaddr
);
808 if (!pte
&& mode
!= BaseTLB::Execute
) {
809 // penalize a "page fault" more
811 latency
+= missLatency2
;
813 if (p
->fixupStackFault(vaddr
))
814 pte
= p
->pTable
->lookup(vaddr
);
818 return std::make_shared
<PageFault
>(vaddr
, true,
822 Addr alignedVaddr
= p
->pTable
->pageAlign(vaddr
);
824 DPRINTF(GPUTLB
, "Mapping %#x to %#x\n",
825 alignedVaddr
, pte
->paddr
);
827 TlbEntry
gpuEntry(p
->pid(), alignedVaddr
,
828 pte
->paddr
, false, false);
829 entry
= insert(alignedVaddr
, gpuEntry
);
832 DPRINTF(GPUTLB
, "Miss was serviced.\n");
838 latency
= hitLatency
;
842 // Do paging protection checks.
843 bool inUser
= (m5Reg
.cpl
== 3 &&
844 !(flags
& (CPL0FlagBit
<< FlagShift
)));
846 CR0 cr0
= tc
->readMiscRegNoEffect(MISCREG_CR0
);
847 bool badWrite
= (!entry
->writable
&& (inUser
|| cr0
.wp
));
849 if ((inUser
&& !entry
->user
) || (mode
== BaseTLB::Write
&&
851 // The page must have been present to get into the TLB in
852 // the first place. We'll assume the reserved bits are
853 // fine even though we're not checking them.
854 return std::make_shared
<PageFault
>(vaddr
, true, mode
,
858 if (storeCheck
&& badWrite
) {
859 // This would fault if this were a write, so return a page
860 // fault that reflects that happening.
861 return std::make_shared
<PageFault
>(vaddr
, true,
867 DPRINTF(GPUTLB
, "Entry found with paddr %#x, doing protection "
868 "checks.\n", entry
->paddr
);
870 int page_size
= entry
->size();
871 Addr paddr
= entry
->paddr
| (vaddr
& (page_size
- 1));
872 DPRINTF(GPUTLB
, "Translated %#x -> %#x.\n", vaddr
, paddr
);
873 req
->setPaddr(paddr
);
875 if (entry
->uncacheable
)
876 req
->setFlags(Request::UNCACHEABLE
);
878 //Use the address which already has segmentation applied.
879 DPRINTF(GPUTLB
, "Paging disabled.\n");
880 DPRINTF(GPUTLB
, "Translated %#x -> %#x.\n", vaddr
, vaddr
);
881 req
->setPaddr(vaddr
);
885 DPRINTF(GPUTLB
, "In real mode.\n");
886 DPRINTF(GPUTLB
, "Translated %#x -> %#x.\n", vaddr
, vaddr
);
887 req
->setPaddr(vaddr
);
890 // Check for an access to the local APIC
892 LocalApicBase localApicBase
=
893 tc
->readMiscRegNoEffect(MISCREG_APIC_BASE
);
895 Addr baseAddr
= localApicBase
.base
* PageBytes
;
896 Addr paddr
= req
->getPaddr();
898 if (baseAddr
<= paddr
&& baseAddr
+ PageBytes
> paddr
) {
899 // Force the access to be uncacheable.
900 req
->setFlags(Request::UNCACHEABLE
);
901 req
->setPaddr(x86LocalAPICAddress(tc
->contextId(),
910 GpuTLB::translateAtomic(const RequestPtr
&req
, ThreadContext
*tc
,
911 Mode mode
, int &latency
)
913 bool delayedResponse
;
915 return GpuTLB::translate(req
, tc
, nullptr, mode
, delayedResponse
, false,
920 GpuTLB::translateTiming(const RequestPtr
&req
, ThreadContext
*tc
,
921 Translation
*translation
, Mode mode
, int &latency
)
923 bool delayedResponse
;
926 Fault fault
= GpuTLB::translate(req
, tc
, translation
, mode
,
927 delayedResponse
, true, latency
);
929 if (!delayedResponse
)
930 translation
->finish(fault
, req
, tc
, mode
);
941 GpuTLB::serialize(CheckpointOut
&cp
) const
946 GpuTLB::unserialize(CheckpointIn
&cp
)
953 ClockedObject::regStats();
956 .name(name() + ".local_TLB_accesses")
957 .desc("Number of TLB accesses")
961 .name(name() + ".local_TLB_hits")
962 .desc("Number of TLB hits")
966 .name(name() + ".local_TLB_misses")
967 .desc("Number of TLB misses")
971 .name(name() + ".local_TLB_miss_rate")
972 .desc("TLB miss rate")
976 .name(name() + ".access_cycles")
977 .desc("Cycles spent accessing this TLB level")
981 .name(name() + ".page_table_cycles")
982 .desc("Cycles spent accessing the page table")
985 localTLBMissRate
= 100 * localNumTLBMisses
/ localNumTLBAccesses
;
988 .name(name() + ".unique_pages")
989 .desc("Number of unique pages touched")
993 .name(name() + ".local_cycles")
994 .desc("Number of cycles spent in queue for all incoming reqs")
998 .name(name() + ".local_latency")
999 .desc("Avg. latency over incoming coalesced reqs")
1002 localLatency
= localCycles
/ localNumTLBAccesses
;
1004 globalNumTLBAccesses
1005 .name(name() + ".global_TLB_accesses")
1006 .desc("Number of TLB accesses")
1010 .name(name() + ".global_TLB_hits")
1011 .desc("Number of TLB hits")
1015 .name(name() + ".global_TLB_misses")
1016 .desc("Number of TLB misses")
1020 .name(name() + ".global_TLB_miss_rate")
1021 .desc("TLB miss rate")
1024 globalTLBMissRate
= 100 * globalNumTLBMisses
/ globalNumTLBAccesses
;
1027 .name(name() + ".avg_reuse_distance")
1028 .desc("avg. reuse distance over all pages (in ticks)")
1034 * Do the TLB lookup for this coalesced request and schedule
1035 * another event <TLB access latency> cycles later.
1039 GpuTLB::issueTLBLookup(PacketPtr pkt
)
1042 assert(pkt
->senderState
);
1044 Addr virt_page_addr
= roundDown(pkt
->req
->getVaddr(),
1047 TranslationState
*sender_state
=
1048 safe_cast
<TranslationState
*>(pkt
->senderState
);
1050 bool update_stats
= !sender_state
->prefetch
;
1051 ThreadContext
* tmp_tc
= sender_state
->tc
;
1053 DPRINTF(GPUTLB
, "Translation req. for virt. page addr %#x\n",
1056 int req_cnt
= sender_state
->reqCnt
.back();
1059 accessCycles
-= (curTick() * req_cnt
);
1060 localCycles
-= curTick();
1061 updatePageFootprint(virt_page_addr
);
1062 globalNumTLBAccesses
+= req_cnt
;
1065 tlbOutcome lookup_outcome
= TLB_MISS
;
1066 const RequestPtr
&tmp_req
= pkt
->req
;
1068 // Access the TLB and figure out if it's a hit or a miss.
1069 bool success
= tlbLookup(tmp_req
, tmp_tc
, update_stats
);
1072 lookup_outcome
= TLB_HIT
;
1073 // Put the entry in SenderState
1074 TlbEntry
*entry
= lookup(tmp_req
->getVaddr(), false);
1077 auto p
= sender_state
->tc
->getProcessPtr();
1078 sender_state
->tlbEntry
=
1079 new TlbEntry(p
->pid(), entry
->vaddr
, entry
->paddr
,
1083 // the reqCnt has an entry per level, so its size tells us
1084 // which level we are in
1085 sender_state
->hitLevel
= sender_state
->reqCnt
.size();
1086 globalNumTLBHits
+= req_cnt
;
1090 globalNumTLBMisses
+= req_cnt
;
1094 * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1095 * as the TLB access latency.
1097 * We create and schedule a new TLBEvent which will help us take the
1098 * appropriate actions (e.g., update TLB on a hit, send request to lower
1099 * level TLB on a miss, or start a page walk if this was the last-level
1102 TLBEvent
*tlb_event
=
1103 new TLBEvent(this, virt_page_addr
, lookup_outcome
, pkt
);
1105 if (translationReturnEvent
.count(virt_page_addr
)) {
1106 panic("Virtual Page Address %#x already has a return event\n",
1110 translationReturnEvent
[virt_page_addr
] = tlb_event
;
1113 DPRINTF(GPUTLB
, "schedule translationReturnEvent @ curTick %d\n",
1114 curTick() + this->ticks(hitLatency
));
1116 schedule(tlb_event
, curTick() + this->ticks(hitLatency
));
1119 GpuTLB::TLBEvent::TLBEvent(GpuTLB
* _tlb
, Addr _addr
, tlbOutcome tlb_outcome
,
1121 : Event(CPU_Tick_Pri
), tlb(_tlb
), virtPageAddr(_addr
),
1122 outcome(tlb_outcome
), pkt(_pkt
)
1127 * Do Paging protection checks. If we encounter a page fault, then
1128 * an assertion is fired.
1131 GpuTLB::pagingProtectionChecks(ThreadContext
*tc
, PacketPtr pkt
,
1132 TlbEntry
* tlb_entry
, Mode mode
)
1134 HandyM5Reg m5Reg
= tc
->readMiscRegNoEffect(MISCREG_M5_REG
);
1135 uint32_t flags
= pkt
->req
->getFlags();
1136 bool storeCheck
= flags
& (StoreCheck
<< FlagShift
);
1138 // Do paging protection checks.
1139 bool inUser
= (m5Reg
.cpl
== 3 && !(flags
& (CPL0FlagBit
<< FlagShift
)));
1140 CR0 cr0
= tc
->readMiscRegNoEffect(MISCREG_CR0
);
1142 bool badWrite
= (!tlb_entry
->writable
&& (inUser
|| cr0
.wp
));
1144 if ((inUser
&& !tlb_entry
->user
) ||
1145 (mode
== BaseTLB::Write
&& badWrite
)) {
1146 // The page must have been present to get into the TLB in
1147 // the first place. We'll assume the reserved bits are
1148 // fine even though we're not checking them.
1149 panic("Page fault detected");
1152 if (storeCheck
&& badWrite
) {
1153 // This would fault if this were a write, so return a page
1154 // fault that reflects that happening.
1155 panic("Page fault detected");
1160 * handleTranslationReturn is called on a TLB hit,
1161 * when a TLB miss returns or when a page fault returns.
1162 * The latter calls handelHit with TLB miss as tlbOutcome.
1165 GpuTLB::handleTranslationReturn(Addr virt_page_addr
, tlbOutcome tlb_outcome
,
1170 Addr vaddr
= pkt
->req
->getVaddr();
1172 TranslationState
*sender_state
=
1173 safe_cast
<TranslationState
*>(pkt
->senderState
);
1175 ThreadContext
*tc
= sender_state
->tc
;
1176 Mode mode
= sender_state
->tlbMode
;
1178 TlbEntry
*local_entry
, *new_entry
;
1180 if (tlb_outcome
== TLB_HIT
) {
1181 DPRINTF(GPUTLB
, "Translation Done - TLB Hit for addr %#x\n", vaddr
);
1182 local_entry
= sender_state
->tlbEntry
;
1184 DPRINTF(GPUTLB
, "Translation Done - TLB Miss for addr %#x\n",
1187 // We are returning either from a page walk or from a hit at a lower
1188 // TLB level. The senderState should be "carrying" a pointer to the
1189 // correct TLBEntry.
1190 new_entry
= sender_state
->tlbEntry
;
1192 local_entry
= new_entry
;
1194 if (allocationPolicy
) {
1195 DPRINTF(GPUTLB
, "allocating entry w/ addr %#x\n",
1198 local_entry
= insert(virt_page_addr
, *new_entry
);
1201 assert(local_entry
);
1205 * At this point the packet carries an up-to-date tlbEntry pointer
1206 * in its senderState.
1207 * Next step is to do the paging protection checks.
1209 DPRINTF(GPUTLB
, "Entry found with vaddr %#x, doing protection checks "
1210 "while paddr was %#x.\n", local_entry
->vaddr
,
1211 local_entry
->paddr
);
1213 pagingProtectionChecks(tc
, pkt
, local_entry
, mode
);
1214 int page_size
= local_entry
->size();
1215 Addr paddr
= local_entry
->paddr
| (vaddr
& (page_size
- 1));
1216 DPRINTF(GPUTLB
, "Translated %#x -> %#x.\n", vaddr
, paddr
);
1218 // Since this packet will be sent through the cpu side slave port,
1219 // it must be converted to a response pkt if it is not one already
1220 if (pkt
->isRequest()) {
1221 pkt
->makeTimingResponse();
1224 pkt
->req
->setPaddr(paddr
);
1226 if (local_entry
->uncacheable
) {
1227 pkt
->req
->setFlags(Request::UNCACHEABLE
);
1230 //send packet back to coalescer
1231 cpuSidePort
[0]->sendTimingResp(pkt
);
1232 //schedule cleanup event
1233 cleanupQueue
.push(virt_page_addr
);
1235 // schedule this only once per cycle.
1236 // The check is required because we might have multiple translations
1237 // returning the same cycle
1238 // this is a maximum priority event and must be on the same cycle
1239 // as the cleanup event in TLBCoalescer to avoid a race with
1240 // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1241 if (!cleanupEvent
.scheduled())
1242 schedule(cleanupEvent
, curTick());
1246 * Here we take the appropriate actions based on the result of the
1250 GpuTLB::translationReturn(Addr virtPageAddr
, tlbOutcome outcome
,
1253 DPRINTF(GPUTLB
, "Triggered TLBEvent for addr %#x\n", virtPageAddr
);
1255 assert(translationReturnEvent
[virtPageAddr
]);
1258 TranslationState
*tmp_sender_state
=
1259 safe_cast
<TranslationState
*>(pkt
->senderState
);
1261 int req_cnt
= tmp_sender_state
->reqCnt
.back();
1262 bool update_stats
= !tmp_sender_state
->prefetch
;
1265 if (outcome
== TLB_HIT
) {
1266 handleTranslationReturn(virtPageAddr
, TLB_HIT
, pkt
);
1269 accessCycles
+= (req_cnt
* curTick());
1270 localCycles
+= curTick();
1273 } else if (outcome
== TLB_MISS
) {
1275 DPRINTF(GPUTLB
, "This is a TLB miss\n");
1277 accessCycles
+= (req_cnt
*curTick());
1278 localCycles
+= curTick();
1281 if (hasMemSidePort
) {
1282 // the one cyle added here represent the delay from when we get
1283 // the reply back till when we propagate it to the coalescer
1286 accessCycles
+= (req_cnt
* 1);
1291 * There is a TLB below. Send the coalesced request.
1292 * We actually send the very first packet of all the
1293 * pending packets for this virtual page address.
1295 if (!memSidePort
[0]->sendTimingReq(pkt
)) {
1296 DPRINTF(GPUTLB
, "Failed sending translation request to "
1297 "lower level TLB for addr %#x\n", virtPageAddr
);
1299 memSidePort
[0]->retries
.push_back(pkt
);
1301 DPRINTF(GPUTLB
, "Sent translation request to lower level "
1302 "TLB for addr %#x\n", virtPageAddr
);
1305 //this is the last level TLB. Start a page walk
1306 DPRINTF(GPUTLB
, "Last level TLB - start a page walk for "
1307 "addr %#x\n", virtPageAddr
);
1310 pageTableCycles
-= (req_cnt
*curTick());
1312 TLBEvent
*tlb_event
= translationReturnEvent
[virtPageAddr
];
1314 tlb_event
->updateOutcome(PAGE_WALK
);
1315 schedule(tlb_event
, curTick() + ticks(missLatency2
));
1317 } else if (outcome
== PAGE_WALK
) {
1319 pageTableCycles
+= (req_cnt
*curTick());
1321 // Need to access the page table and update the TLB
1322 DPRINTF(GPUTLB
, "Doing a page walk for address %#x\n",
1325 TranslationState
*sender_state
=
1326 safe_cast
<TranslationState
*>(pkt
->senderState
);
1328 Process
*p
= sender_state
->tc
->getProcessPtr();
1329 Addr vaddr
= pkt
->req
->getVaddr();
1331 Addr alignedVaddr
= p
->pTable
->pageAlign(vaddr
);
1332 assert(alignedVaddr
== virtPageAddr
);
1334 const EmulationPageTable::Entry
*pte
= p
->pTable
->lookup(vaddr
);
1335 if (!pte
&& sender_state
->tlbMode
!= BaseTLB::Execute
&&
1336 p
->fixupStackFault(vaddr
)) {
1337 pte
= p
->pTable
->lookup(vaddr
);
1341 DPRINTF(GPUTLB
, "Mapping %#x to %#x\n", alignedVaddr
,
1344 sender_state
->tlbEntry
=
1345 new TlbEntry(p
->pid(), virtPageAddr
, pte
->paddr
, false,
1348 sender_state
->tlbEntry
= nullptr;
1351 handleTranslationReturn(virtPageAddr
, TLB_MISS
, pkt
);
1352 } else if (outcome
== MISS_RETURN
) {
1353 /** we add an extra cycle in the return path of the translation
1354 * requests in between the various TLB levels.
1356 handleTranslationReturn(virtPageAddr
, TLB_MISS
, pkt
);
1358 panic("Unexpected TLB outcome %d", outcome
);
1363 GpuTLB::TLBEvent::process()
1365 tlb
->translationReturn(virtPageAddr
, outcome
, pkt
);
1369 GpuTLB::TLBEvent::description() const
1371 return "trigger translationDoneEvent";
1375 GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome
)
1381 GpuTLB::TLBEvent::getTLBEventVaddr()
1383 return virtPageAddr
;
1387 * recvTiming receives a coalesced timing request from a TLBCoalescer
1388 * and it calls issueTLBLookup()
1389 * It only rejects the packet if we have exceeded the max
1390 * outstanding number of requests for the TLB
1393 GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt
)
1395 if (tlb
->outstandingReqs
< tlb
->maxCoalescedReqs
) {
1396 tlb
->issueTLBLookup(pkt
);
1397 // update number of outstanding translation requests
1398 tlb
->outstandingReqs
++;
1401 DPRINTF(GPUTLB
, "Reached maxCoalescedReqs number %d\n",
1402 tlb
->outstandingReqs
);
1408 * handleFuncTranslationReturn is called on a TLB hit,
1409 * when a TLB miss returns or when a page fault returns.
1410 * It updates LRU, inserts the TLB entry on a miss
1411 * depending on the allocation policy and does the required
1412 * protection checks. It does NOT create a new packet to
1413 * update the packet's addr; this is done in hsail-gpu code.
1416 GpuTLB::handleFuncTranslationReturn(PacketPtr pkt
, tlbOutcome tlb_outcome
)
1418 TranslationState
*sender_state
=
1419 safe_cast
<TranslationState
*>(pkt
->senderState
);
1421 ThreadContext
*tc
= sender_state
->tc
;
1422 Mode mode
= sender_state
->tlbMode
;
1423 Addr vaddr
= pkt
->req
->getVaddr();
1425 TlbEntry
*local_entry
, *new_entry
;
1427 if (tlb_outcome
== TLB_HIT
) {
1428 DPRINTF(GPUTLB
, "Functional Translation Done - TLB hit for addr "
1431 local_entry
= sender_state
->tlbEntry
;
1433 DPRINTF(GPUTLB
, "Functional Translation Done - TLB miss for addr "
1436 // We are returning either from a page walk or from a hit at a lower
1437 // TLB level. The senderState should be "carrying" a pointer to the
1438 // correct TLBEntry.
1439 new_entry
= sender_state
->tlbEntry
;
1441 local_entry
= new_entry
;
1443 if (allocationPolicy
) {
1444 Addr virt_page_addr
= roundDown(vaddr
, TheISA::PageBytes
);
1446 DPRINTF(GPUTLB
, "allocating entry w/ addr %#x\n",
1449 local_entry
= insert(virt_page_addr
, *new_entry
);
1452 assert(local_entry
);
1455 DPRINTF(GPUTLB
, "Entry found with vaddr %#x, doing protection checks "
1456 "while paddr was %#x.\n", local_entry
->vaddr
,
1457 local_entry
->paddr
);
1460 * Do paging checks if it's a normal functional access. If it's for a
1461 * prefetch, then sometimes you can try to prefetch something that
1462 * won't pass protection. We don't actually want to fault becuase there
1463 * is no demand access to deem this a violation. Just put it in the
1464 * TLB and it will fault if indeed a future demand access touches it in
1467 * This feature could be used to explore security issues around
1468 * speculative memory accesses.
1470 if (!sender_state
->prefetch
&& sender_state
->tlbEntry
)
1471 pagingProtectionChecks(tc
, pkt
, local_entry
, mode
);
1473 int page_size
= local_entry
->size();
1474 Addr paddr
= local_entry
->paddr
| (vaddr
& (page_size
- 1));
1475 DPRINTF(GPUTLB
, "Translated %#x -> %#x.\n", vaddr
, paddr
);
1477 pkt
->req
->setPaddr(paddr
);
1479 if (local_entry
->uncacheable
)
1480 pkt
->req
->setFlags(Request::UNCACHEABLE
);
1483 // This is used for atomic translations. Need to
1484 // make it all happen during the same cycle.
1486 GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt
)
1488 TranslationState
*sender_state
=
1489 safe_cast
<TranslationState
*>(pkt
->senderState
);
1491 ThreadContext
*tc
= sender_state
->tc
;
1492 bool update_stats
= !sender_state
->prefetch
;
1494 Addr virt_page_addr
= roundDown(pkt
->req
->getVaddr(),
1498 tlb
->updatePageFootprint(virt_page_addr
);
1500 // do the TLB lookup without updating the stats
1501 bool success
= tlb
->tlbLookup(pkt
->req
, tc
, update_stats
);
1502 tlbOutcome tlb_outcome
= success
? TLB_HIT
: TLB_MISS
;
1504 // functional mode means no coalescing
1505 // global metrics are the same as the local metrics
1507 tlb
->globalNumTLBAccesses
++;
1510 sender_state
->hitLevel
= sender_state
->reqCnt
.size();
1511 tlb
->globalNumTLBHits
++;
1517 tlb
->globalNumTLBMisses
++;
1518 if (tlb
->hasMemSidePort
) {
1519 // there is a TLB below -> propagate down the TLB hierarchy
1520 tlb
->memSidePort
[0]->sendFunctional(pkt
);
1521 // If no valid translation from a prefetch, then just return
1522 if (sender_state
->prefetch
&& !pkt
->req
->hasPaddr())
1525 // Need to access the page table and update the TLB
1526 DPRINTF(GPUTLB
, "Doing a page walk for address %#x\n",
1529 Process
*p
= tc
->getProcessPtr();
1531 Addr vaddr
= pkt
->req
->getVaddr();
1533 Addr alignedVaddr
= p
->pTable
->pageAlign(vaddr
);
1534 assert(alignedVaddr
== virt_page_addr
);
1537 const EmulationPageTable::Entry
*pte
=
1538 p
->pTable
->lookup(vaddr
);
1539 if (!pte
&& sender_state
->tlbMode
!= BaseTLB::Execute
&&
1540 p
->fixupStackFault(vaddr
)) {
1541 pte
= p
->pTable
->lookup(vaddr
);
1544 if (!sender_state
->prefetch
) {
1545 // no PageFaults are permitted after
1546 // the second page table lookup
1549 DPRINTF(GPUTLB
, "Mapping %#x to %#x\n", alignedVaddr
,
1552 sender_state
->tlbEntry
=
1553 new TlbEntry(p
->pid(), virt_page_addr
,
1554 pte
->paddr
, false, false);
1556 // If this was a prefetch, then do the normal thing if it
1557 // was a successful translation. Otherwise, send an empty
1558 // TLB entry back so that it can be figured out as empty and
1559 // handled accordingly.
1561 DPRINTF(GPUTLB
, "Mapping %#x to %#x\n", alignedVaddr
,
1564 sender_state
->tlbEntry
=
1565 new TlbEntry(p
->pid(), virt_page_addr
,
1566 pte
->paddr
, false, false);
1568 DPRINTF(GPUPrefetch
, "Prefetch failed %#x\n",
1571 sender_state
->tlbEntry
= nullptr;
1578 DPRINTF(GPUPrefetch
, "Functional Hit for vaddr %#x\n",
1579 tlb
->lookup(pkt
->req
->getVaddr()));
1581 TlbEntry
*entry
= tlb
->lookup(pkt
->req
->getVaddr(),
1586 auto p
= sender_state
->tc
->getProcessPtr();
1587 sender_state
->tlbEntry
=
1588 new TlbEntry(p
->pid(), entry
->vaddr
, entry
->paddr
,
1591 // This is the function that would populate pkt->req with the paddr of
1592 // the translation. But if no translation happens (i.e Prefetch fails)
1593 // then the early returns in the above code wiill keep this function
1595 tlb
->handleFuncTranslationReturn(pkt
, tlb_outcome
);
1599 GpuTLB::CpuSidePort::recvReqRetry()
1601 // The CPUSidePort never sends anything but replies. No retries
1603 panic("recvReqRetry called");
1607 GpuTLB::CpuSidePort::getAddrRanges() const
1609 // currently not checked by the master
1610 AddrRangeList ranges
;
1616 * MemSidePort receives the packet back.
1617 * We need to call the handleTranslationReturn
1618 * and propagate up the hierarchy.
1621 GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt
)
1623 Addr virt_page_addr
= roundDown(pkt
->req
->getVaddr(),
1626 DPRINTF(GPUTLB
, "MemSidePort recvTiming for virt_page_addr %#x\n",
1629 TLBEvent
*tlb_event
= tlb
->translationReturnEvent
[virt_page_addr
];
1631 assert(virt_page_addr
== tlb_event
->getTLBEventVaddr());
1633 tlb_event
->updateOutcome(MISS_RETURN
);
1634 tlb
->schedule(tlb_event
, curTick()+tlb
->ticks(1));
1640 GpuTLB::MemSidePort::recvReqRetry()
1642 // No retries should reach the TLB. The retries
1643 // should only reach the TLBCoalescer.
1644 panic("recvReqRetry called");
1650 while (!cleanupQueue
.empty()) {
1651 Addr cleanup_addr
= cleanupQueue
.front();
1655 TLBEvent
* old_tlb_event
= translationReturnEvent
[cleanup_addr
];
1656 delete old_tlb_event
;
1657 translationReturnEvent
.erase(cleanup_addr
);
1659 // update number of outstanding requests
1663 /** the higher level coalescer should retry if it has
1664 * any pending requests.
1666 for (int i
= 0; i
< cpuSidePort
.size(); ++i
) {
1667 cpuSidePort
[i
]->sendRetryReq();
1672 GpuTLB::updatePageFootprint(Addr virt_page_addr
)
1675 std::pair
<AccessPatternTable::iterator
, bool> ret
;
1677 AccessInfo tmp_access_info
;
1678 tmp_access_info
.lastTimeAccessed
= 0;
1679 tmp_access_info
.accessesPerPage
= 0;
1680 tmp_access_info
.totalReuseDistance
= 0;
1681 tmp_access_info
.sumDistance
= 0;
1682 tmp_access_info
.meanDistance
= 0;
1684 ret
= TLBFootprint
.insert(AccessPatternTable::value_type(virt_page_addr
,
1687 bool first_page_access
= ret
.second
;
1689 if (first_page_access
) {
1692 int accessed_before
;
1693 accessed_before
= curTick() - ret
.first
->second
.lastTimeAccessed
;
1694 ret
.first
->second
.totalReuseDistance
+= accessed_before
;
1697 ret
.first
->second
.accessesPerPage
++;
1698 ret
.first
->second
.lastTimeAccessed
= curTick();
1700 if (accessDistance
) {
1701 ret
.first
->second
.localTLBAccesses
1702 .push_back(localNumTLBAccesses
.value());
1707 GpuTLB::exitCallback()
1709 std::ostream
*page_stat_file
= nullptr;
1711 if (accessDistance
) {
1713 // print per page statistics to a separate file (.csv format)
1714 // simout is the gem5 output directory (default is m5out or the one
1715 // specified with -d
1716 page_stat_file
= simout
.create(name().c_str())->stream();
1719 *page_stat_file
<< "page,max_access_distance,mean_access_distance, "
1720 << "stddev_distance" << std::endl
;
1723 // update avg. reuse distance footprint
1724 AccessPatternTable::iterator iter
, iter_begin
, iter_end
;
1725 unsigned int sum_avg_reuse_distance_per_page
= 0;
1727 // iterate through all pages seen by this TLB
1728 for (iter
= TLBFootprint
.begin(); iter
!= TLBFootprint
.end(); iter
++) {
1729 sum_avg_reuse_distance_per_page
+= iter
->second
.totalReuseDistance
/
1730 iter
->second
.accessesPerPage
;
1732 if (accessDistance
) {
1733 unsigned int tmp
= iter
->second
.localTLBAccesses
[0];
1734 unsigned int prev
= tmp
;
1736 for (int i
= 0; i
< iter
->second
.localTLBAccesses
.size(); ++i
) {
1741 prev
= iter
->second
.localTLBAccesses
[i
];
1742 // update the localTLBAccesses value
1743 // with the actual differece
1744 iter
->second
.localTLBAccesses
[i
] -= tmp
;
1745 // compute the sum of AccessDistance per page
1746 // used later for mean
1747 iter
->second
.sumDistance
+=
1748 iter
->second
.localTLBAccesses
[i
];
1751 iter
->second
.meanDistance
=
1752 iter
->second
.sumDistance
/ iter
->second
.accessesPerPage
;
1754 // compute std_dev and max (we need a second round because we
1755 // need to know the mean value
1756 unsigned int max_distance
= 0;
1757 unsigned int stddev_distance
= 0;
1759 for (int i
= 0; i
< iter
->second
.localTLBAccesses
.size(); ++i
) {
1760 unsigned int tmp_access_distance
=
1761 iter
->second
.localTLBAccesses
[i
];
1763 if (tmp_access_distance
> max_distance
) {
1764 max_distance
= tmp_access_distance
;
1768 tmp_access_distance
- iter
->second
.meanDistance
;
1769 stddev_distance
+= pow(diff
, 2);
1774 sqrt(stddev_distance
/iter
->second
.accessesPerPage
);
1776 if (page_stat_file
) {
1777 *page_stat_file
<< std::hex
<< iter
->first
<< ",";
1778 *page_stat_file
<< std::dec
<< max_distance
<< ",";
1779 *page_stat_file
<< std::dec
<< iter
->second
.meanDistance
1781 *page_stat_file
<< std::dec
<< stddev_distance
;
1782 *page_stat_file
<< std::endl
;
1785 // erase the localTLBAccesses array
1786 iter
->second
.localTLBAccesses
.clear();
1790 if (!TLBFootprint
.empty()) {
1792 sum_avg_reuse_distance_per_page
/ TLBFootprint
.size();
1795 //clear the TLBFootprint map
1796 TLBFootprint
.clear();
1798 } // namespace X86ISA
1801 X86GPUTLBParams::create()
1803 return new X86ISA::GpuTLB(this);