2 * Copyright (c) 2014-2015 ARM Limited
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 * Authors: Mitch Hayenga
40 #include "mem/cache/prefetch/queued.hh"
44 #include "arch/generic/tlb.hh"
45 #include "base/logging.hh"
46 #include "base/trace.hh"
47 #include "debug/HWPrefetch.hh"
48 #include "mem/cache/base.hh"
49 #include "mem/request.hh"
50 #include "params/QueuedPrefetcher.hh"
53 QueuedPrefetcher::DeferredPacket::createPkt(Addr paddr
, unsigned blk_size
,
54 MasterID mid
, bool tag_prefetch
,
56 /* Create a prefetch memory request */
57 RequestPtr req
= std::make_shared
<Request
>(paddr
, blk_size
, 0, mid
);
59 if (pfInfo
.isSecure()) {
60 req
->setFlags(Request::SECURE
);
62 req
->taskId(ContextSwitchTaskId::Prefetcher
);
63 pkt
= new Packet(req
, MemCmd::HardPFReq
);
65 if (tag_prefetch
&& pfInfo
.hasPC()) {
66 // Tag prefetch packet with accessing pc
67 pkt
->req
->setPC(pfInfo
.getPC());
73 QueuedPrefetcher::DeferredPacket::startTranslation(BaseTLB
*tlb
)
75 assert(translationRequest
!= nullptr);
76 if (!ongoingTranslation
) {
77 ongoingTranslation
= true;
78 // Prefetchers only operate in Timing mode
79 tlb
->translateTiming(translationRequest
, tc
, this, BaseTLB::Read
);
84 QueuedPrefetcher::DeferredPacket::finish(const Fault
&fault
,
85 const RequestPtr
&req
, ThreadContext
*tc
, BaseTLB::Mode mode
)
87 assert(ongoingTranslation
);
88 ongoingTranslation
= false;
89 bool failed
= (fault
!= NoFault
);
90 owner
->translationComplete(this, failed
);
93 QueuedPrefetcher::QueuedPrefetcher(const QueuedPrefetcherParams
*p
)
94 : BasePrefetcher(p
), queueSize(p
->queue_size
),
95 missingTranslationQueueSize(
96 p
->max_prefetch_requests_with_pending_translation
),
97 latency(p
->latency
), queueSquash(p
->queue_squash
),
98 queueFilter(p
->queue_filter
), cacheSnoop(p
->cache_snoop
),
99 tagPrefetch(p
->tag_prefetch
),
100 throttleControlPct(p
->throttle_control_percentage
)
104 QueuedPrefetcher::~QueuedPrefetcher()
106 // Delete the queued prefetch packets
107 for (DeferredPacket
&p
: pfq
) {
113 QueuedPrefetcher::getMaxPermittedPrefetches(size_t total
) const
116 * Throttle generated prefetches based in the accuracy of the prefetcher.
117 * Accuracy is computed based in the ratio of useful prefetches with
118 * respect to the number of issued prefetches.
120 * The throttleControlPct controls how many of the candidate addresses
121 * generated by the prefetcher will be finally turned into prefetch
123 * - If set to 100, all candidates can be discarded (one request
124 * will always be allowed to be generated)
125 * - Setting it to 0 will disable the throttle control, so requests are
126 * created for all candidates
127 * - If set to 60, 40% of candidates will generate a request, and the
128 * remaining 60% will be generated depending on the current accuracy
131 size_t max_pfs
= total
;
132 if (total
> 0 && issuedPrefetches
> 0) {
133 size_t throttle_pfs
= (total
* throttleControlPct
) / 100;
134 size_t min_pfs
= (total
- throttle_pfs
) == 0 ?
135 1 : (total
- throttle_pfs
);
136 max_pfs
= min_pfs
+ (total
- min_pfs
) *
137 usefulPrefetches
/ issuedPrefetches
;
143 QueuedPrefetcher::notify(const PacketPtr
&pkt
, const PrefetchInfo
&pfi
)
145 Addr blk_addr
= blockAddress(pfi
.getAddr());
146 bool is_secure
= pfi
.isSecure();
148 // Squash queued prefetches if demand miss to same line
150 auto itr
= pfq
.begin();
151 while (itr
!= pfq
.end()) {
152 if (itr
->pfInfo
.getAddr() == blk_addr
&&
153 itr
->pfInfo
.isSecure() == is_secure
) {
155 itr
= pfq
.erase(itr
);
162 // Calculate prefetches given this access
163 std::vector
<AddrPriority
> addresses
;
164 calculatePrefetch(pfi
, addresses
);
166 // Get the maximu number of prefetches that we are allowed to generate
167 size_t max_pfs
= getMaxPermittedPrefetches(addresses
.size());
169 // Queue up generated prefetches
171 for (AddrPriority
& addr_prio
: addresses
) {
173 // Block align prefetch address
174 addr_prio
.first
= blockAddress(addr_prio
.first
);
176 if (!samePage(addr_prio
.first
, pfi
.getAddr())) {
180 bool can_cross_page
= (tlb
!= nullptr);
181 if (can_cross_page
|| samePage(addr_prio
.first
, pfi
.getAddr())) {
182 PrefetchInfo
new_pfi(pfi
,addr_prio
.first
);
184 DPRINTF(HWPrefetch
, "Found a pf candidate addr: %#x, "
185 "inserting into prefetch queue.\n", new_pfi
.getAddr());
186 // Create and insert the request
187 insert(pkt
, new_pfi
, addr_prio
.second
);
189 if (num_pfs
== max_pfs
) {
193 DPRINTF(HWPrefetch
, "Ignoring page crossing prefetch.\n");
199 QueuedPrefetcher::getPacket()
201 DPRINTF(HWPrefetch
, "Requesting a prefetch to issue.\n");
204 // If the queue is empty, attempt first to fill it with requests
205 // from the queue of missing translations
206 processMissingTranslations(queueSize
);
210 DPRINTF(HWPrefetch
, "No hardware prefetches available.\n");
214 PacketPtr pkt
= pfq
.front().pkt
;
218 issuedPrefetches
+= 1;
219 assert(pkt
!= nullptr);
220 DPRINTF(HWPrefetch
, "Generating prefetch for %#x.\n", pkt
->getAddr());
222 processMissingTranslations(queueSize
- pfq
.size());
227 QueuedPrefetcher::regStats()
229 BasePrefetcher::regStats();
232 .name(name() + ".pfIdentified")
233 .desc("number of prefetch candidates identified");
236 .name(name() + ".pfBufferHit")
237 .desc("number of redundant prefetches already in prefetch queue");
240 .name(name() + ".pfInCache")
241 .desc("number of redundant prefetches already in cache/mshr dropped");
244 .name(name() + ".pfRemovedFull")
245 .desc("number of prefetches dropped due to prefetch queue size");
248 .name(name() + ".pfSpanPage")
249 .desc("number of prefetches that crossed the page");
254 QueuedPrefetcher::processMissingTranslations(unsigned max
)
257 iterator it
= pfqMissingTranslation
.begin();
258 while (it
!= pfqMissingTranslation
.end() && count
< max
) {
259 DeferredPacket
&dp
= *it
;
260 // Increase the iterator first because dp.startTranslation can end up
261 // calling finishTranslation, which will erase "it"
263 dp
.startTranslation(tlb
);
269 QueuedPrefetcher::translationComplete(DeferredPacket
*dp
, bool failed
)
271 auto it
= pfqMissingTranslation
.begin();
272 while (it
!= pfqMissingTranslation
.end()) {
278 assert(it
!= pfqMissingTranslation
.end());
280 DPRINTF(HWPrefetch
, "%s Translation of vaddr %#x succeeded: "
281 "paddr %#x \n", tlb
->name(),
282 it
->translationRequest
->getVaddr(),
283 it
->translationRequest
->getPaddr());
284 Addr target_paddr
= it
->translationRequest
->getPaddr();
285 // check if this prefetch is already redundant
286 if (cacheSnoop
&& (inCache(target_paddr
, it
->pfInfo
.isSecure()) ||
287 inMissQueue(target_paddr
, it
->pfInfo
.isSecure()))) {
289 DPRINTF(HWPrefetch
, "Dropping redundant in "
290 "cache/MSHR prefetch addr:%#x\n", target_paddr
);
292 Tick pf_time
= curTick() + clockPeriod() * latency
;
293 it
->createPkt(it
->translationRequest
->getPaddr(), blkSize
,
294 masterId
, tagPrefetch
, pf_time
);
295 addToQueue(pfq
, *it
);
298 DPRINTF(HWPrefetch
, "%s Translation of vaddr %#x failed, dropping "
299 "prefetch request %#x \n", tlb
->name(),
300 it
->translationRequest
->getVaddr());
302 pfqMissingTranslation
.erase(it
);
306 QueuedPrefetcher::alreadyInQueue(std::list
<DeferredPacket
> &queue
,
307 const PrefetchInfo
&pfi
, int32_t priority
)
311 for (it
= queue
.begin(); it
!= queue
.end() && !found
; it
++) {
312 found
= it
->pfInfo
.sameAddr(pfi
);
315 /* If the address is already in the queue, update priority and leave */
316 if (it
!= queue
.end()) {
318 if (it
->priority
< priority
) {
319 /* Update priority value and position in the queue */
320 it
->priority
= priority
;
322 while (prev
!= queue
.begin()) {
324 /* If the packet has higher priority, swap */
326 std::swap(*it
, *prev
);
330 DPRINTF(HWPrefetch
, "Prefetch addr already in "
331 "prefetch queue, priority updated\n");
333 DPRINTF(HWPrefetch
, "Prefetch addr already in "
341 QueuedPrefetcher::createPrefetchRequest(Addr addr
, PrefetchInfo
const &pfi
,
344 RequestPtr translation_req
= std::make_shared
<Request
>(pkt
->req
->getAsid(),
345 addr
, blkSize
, pkt
->req
->getFlags(), masterId
, pfi
.getPC(),
346 pkt
->req
->contextId());
347 translation_req
->setFlags(Request::PREFETCH
);
348 return translation_req
;
352 QueuedPrefetcher::insert(const PacketPtr
&pkt
, PrefetchInfo
&new_pfi
,
356 if (alreadyInQueue(pfq
, new_pfi
, priority
)) {
359 if (alreadyInQueue(pfqMissingTranslation
, new_pfi
, priority
)) {
365 * Physical address computation
366 * if the prefetch is within the same page
367 * using VA: add the computed stride to the original PA
368 * using PA: no actions needed
369 * if we are page crossing
370 * using VA: Create a translaion request and enqueue the corresponding
371 * deferred packet to the queue of pending translations
372 * using PA: use the provided VA to obtain the target VA, then attempt to
373 * translate the resulting address
376 Addr orig_addr
= useVirtualAddresses
?
377 pkt
->req
->getVaddr() : pkt
->req
->getPaddr();
378 bool positive_stride
= new_pfi
.getAddr() >= orig_addr
;
379 Addr stride
= positive_stride
?
380 (new_pfi
.getAddr() - orig_addr
) : (orig_addr
- new_pfi
.getAddr());
383 bool has_target_pa
= false;
384 RequestPtr translation_req
= nullptr;
385 if (samePage(orig_addr
, new_pfi
.getAddr())) {
386 if (useVirtualAddresses
) {
387 // if we trained with virtual addresses,
388 // compute the target PA using the original PA and adding the
389 // prefetch stride (difference between target VA and original VA)
390 target_paddr
= positive_stride
? (pkt
->req
->getPaddr() + stride
) :
391 (pkt
->req
->getPaddr() - stride
);
393 target_paddr
= new_pfi
.getAddr();
395 has_target_pa
= true;
397 // Page crossing reference
399 // ContextID is needed for translation
400 if (!pkt
->req
->hasContextId()) {
403 if (useVirtualAddresses
) {
404 has_target_pa
= false;
405 translation_req
= createPrefetchRequest(new_pfi
.getAddr(), new_pfi
,
407 } else if (pkt
->req
->hasVaddr()) {
408 has_target_pa
= false;
409 // Compute the target VA using req->getVaddr + stride
410 Addr target_vaddr
= positive_stride
?
411 (pkt
->req
->getVaddr() + stride
) :
412 (pkt
->req
->getVaddr() - stride
);
413 translation_req
= createPrefetchRequest(target_vaddr
, new_pfi
,
416 // Using PA for training but the request does not have a VA,
417 // unable to process this page crossing prefetch.
421 if (has_target_pa
&& cacheSnoop
&&
422 (inCache(target_paddr
, new_pfi
.isSecure()) ||
423 inMissQueue(target_paddr
, new_pfi
.isSecure()))) {
425 DPRINTF(HWPrefetch
, "Dropping redundant in "
426 "cache/MSHR prefetch addr:%#x\n", target_paddr
);
430 /* Create the packet and find the spot to insert it */
431 DeferredPacket
dpp(this, new_pfi
, 0, priority
);
433 Tick pf_time
= curTick() + clockPeriod() * latency
;
434 dpp
.createPkt(target_paddr
, blkSize
, masterId
, tagPrefetch
, pf_time
);
435 DPRINTF(HWPrefetch
, "Prefetch queued. "
436 "addr:%#x priority: %3d tick:%lld.\n",
437 new_pfi
.getAddr(), priority
, pf_time
);
438 addToQueue(pfq
, dpp
);
440 // Add the translation request and try to resolve it later
441 dpp
.setTranslationRequest(translation_req
);
442 dpp
.tc
= cache
->system
->getThreadContext(translation_req
->contextId());
443 DPRINTF(HWPrefetch
, "Prefetch queued with no translation. "
444 "addr:%#x priority: %3d\n", new_pfi
.getAddr(), priority
);
445 addToQueue(pfqMissingTranslation
, dpp
);
450 QueuedPrefetcher::addToQueue(std::list
<DeferredPacket
> &queue
,
453 /* Verify prefetch buffer space for request */
454 if (queue
.size() == queueSize
) {
456 /* Lowest priority packet */
457 iterator it
= queue
.end();
458 panic_if (it
== queue
.begin(),
459 "Prefetch queue is both full and empty!");
461 /* Look for oldest in that level of priority */
462 panic_if (it
== queue
.begin(),
463 "Prefetch queue is full with 1 element!");
466 /* While not at the head of the queue */
467 while (cont
&& prev
!= queue
.begin()) {
469 /* While at the same level of priority */
470 cont
= prev
->priority
== it
->priority
;
475 DPRINTF(HWPrefetch
, "Prefetch queue full, removing lowest priority "
476 "oldest packet, addr: %#x\n",it
->pfInfo
.getAddr());
481 if (queue
.size() == 0) {
482 queue
.emplace_back(dpp
);
484 iterator it
= queue
.end();
487 } while (it
!= queue
.begin() && dpp
> *it
);
488 /* If we reach the head, we have to see if the new element is new head
490 if (it
== queue
.begin() && dpp
<= *it
)
492 queue
.insert(it
, dpp
);