mem-cache: Initialize all members of `QueuedPrefetcher::DeferredPacket`.
[gem5.git] / src / mem / cache / prefetch / queued.cc
1 /*
2 * Copyright (c) 2014-2015 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Mitch Hayenga
38 */
39
40 #include "mem/cache/prefetch/queued.hh"
41
42 #include <cassert>
43
44 #include "arch/generic/tlb.hh"
45 #include "base/logging.hh"
46 #include "base/trace.hh"
47 #include "debug/HWPrefetch.hh"
48 #include "mem/cache/base.hh"
49 #include "mem/request.hh"
50 #include "params/QueuedPrefetcher.hh"
51
52 void
53 QueuedPrefetcher::DeferredPacket::createPkt(Addr paddr, unsigned blk_size,
54 MasterID mid, bool tag_prefetch,
55 Tick t) {
56 /* Create a prefetch memory request */
57 RequestPtr req = std::make_shared<Request>(paddr, blk_size, 0, mid);
58
59 if (pfInfo.isSecure()) {
60 req->setFlags(Request::SECURE);
61 }
62 req->taskId(ContextSwitchTaskId::Prefetcher);
63 pkt = new Packet(req, MemCmd::HardPFReq);
64 pkt->allocate();
65 if (tag_prefetch && pfInfo.hasPC()) {
66 // Tag prefetch packet with accessing pc
67 pkt->req->setPC(pfInfo.getPC());
68 }
69 tick = t;
70 }
71
72 void
73 QueuedPrefetcher::DeferredPacket::startTranslation(BaseTLB *tlb)
74 {
75 assert(translationRequest != nullptr);
76 if (!ongoingTranslation) {
77 ongoingTranslation = true;
78 // Prefetchers only operate in Timing mode
79 tlb->translateTiming(translationRequest, tc, this, BaseTLB::Read);
80 }
81 }
82
83 void
84 QueuedPrefetcher::DeferredPacket::finish(const Fault &fault,
85 const RequestPtr &req, ThreadContext *tc, BaseTLB::Mode mode)
86 {
87 assert(ongoingTranslation);
88 ongoingTranslation = false;
89 bool failed = (fault != NoFault);
90 owner->translationComplete(this, failed);
91 }
92
93 QueuedPrefetcher::QueuedPrefetcher(const QueuedPrefetcherParams *p)
94 : BasePrefetcher(p), queueSize(p->queue_size),
95 missingTranslationQueueSize(
96 p->max_prefetch_requests_with_pending_translation),
97 latency(p->latency), queueSquash(p->queue_squash),
98 queueFilter(p->queue_filter), cacheSnoop(p->cache_snoop),
99 tagPrefetch(p->tag_prefetch),
100 throttleControlPct(p->throttle_control_percentage)
101 {
102 }
103
104 QueuedPrefetcher::~QueuedPrefetcher()
105 {
106 // Delete the queued prefetch packets
107 for (DeferredPacket &p : pfq) {
108 delete p.pkt;
109 }
110 }
111
112 size_t
113 QueuedPrefetcher::getMaxPermittedPrefetches(size_t total) const
114 {
115 /**
116 * Throttle generated prefetches based in the accuracy of the prefetcher.
117 * Accuracy is computed based in the ratio of useful prefetches with
118 * respect to the number of issued prefetches.
119 *
120 * The throttleControlPct controls how many of the candidate addresses
121 * generated by the prefetcher will be finally turned into prefetch
122 * requests
123 * - If set to 100, all candidates can be discarded (one request
124 * will always be allowed to be generated)
125 * - Setting it to 0 will disable the throttle control, so requests are
126 * created for all candidates
127 * - If set to 60, 40% of candidates will generate a request, and the
128 * remaining 60% will be generated depending on the current accuracy
129 */
130
131 size_t max_pfs = total;
132 if (total > 0 && issuedPrefetches > 0) {
133 size_t throttle_pfs = (total * throttleControlPct) / 100;
134 size_t min_pfs = (total - throttle_pfs) == 0 ?
135 1 : (total - throttle_pfs);
136 max_pfs = min_pfs + (total - min_pfs) *
137 usefulPrefetches / issuedPrefetches;
138 }
139 return max_pfs;
140 }
141
142 void
143 QueuedPrefetcher::notify(const PacketPtr &pkt, const PrefetchInfo &pfi)
144 {
145 Addr blk_addr = blockAddress(pfi.getAddr());
146 bool is_secure = pfi.isSecure();
147
148 // Squash queued prefetches if demand miss to same line
149 if (queueSquash) {
150 auto itr = pfq.begin();
151 while (itr != pfq.end()) {
152 if (itr->pfInfo.getAddr() == blk_addr &&
153 itr->pfInfo.isSecure() == is_secure) {
154 delete itr->pkt;
155 itr = pfq.erase(itr);
156 } else {
157 ++itr;
158 }
159 }
160 }
161
162 // Calculate prefetches given this access
163 std::vector<AddrPriority> addresses;
164 calculatePrefetch(pfi, addresses);
165
166 // Get the maximu number of prefetches that we are allowed to generate
167 size_t max_pfs = getMaxPermittedPrefetches(addresses.size());
168
169 // Queue up generated prefetches
170 size_t num_pfs = 0;
171 for (AddrPriority& addr_prio : addresses) {
172
173 // Block align prefetch address
174 addr_prio.first = blockAddress(addr_prio.first);
175
176 if (!samePage(addr_prio.first, pfi.getAddr())) {
177 pfSpanPage += 1;
178 }
179
180 bool can_cross_page = (tlb != nullptr);
181 if (can_cross_page || samePage(addr_prio.first, pfi.getAddr())) {
182 PrefetchInfo new_pfi(pfi,addr_prio.first);
183 pfIdentified++;
184 DPRINTF(HWPrefetch, "Found a pf candidate addr: %#x, "
185 "inserting into prefetch queue.\n", new_pfi.getAddr());
186 // Create and insert the request
187 insert(pkt, new_pfi, addr_prio.second);
188 num_pfs += 1;
189 if (num_pfs == max_pfs) {
190 break;
191 }
192 } else {
193 DPRINTF(HWPrefetch, "Ignoring page crossing prefetch.\n");
194 }
195 }
196 }
197
198 PacketPtr
199 QueuedPrefetcher::getPacket()
200 {
201 DPRINTF(HWPrefetch, "Requesting a prefetch to issue.\n");
202
203 if (pfq.empty()) {
204 // If the queue is empty, attempt first to fill it with requests
205 // from the queue of missing translations
206 processMissingTranslations(queueSize);
207 }
208
209 if (pfq.empty()) {
210 DPRINTF(HWPrefetch, "No hardware prefetches available.\n");
211 return nullptr;
212 }
213
214 PacketPtr pkt = pfq.front().pkt;
215 pfq.pop_front();
216
217 pfIssued++;
218 issuedPrefetches += 1;
219 assert(pkt != nullptr);
220 DPRINTF(HWPrefetch, "Generating prefetch for %#x.\n", pkt->getAddr());
221
222 processMissingTranslations(queueSize - pfq.size());
223 return pkt;
224 }
225
226 void
227 QueuedPrefetcher::regStats()
228 {
229 BasePrefetcher::regStats();
230
231 pfIdentified
232 .name(name() + ".pfIdentified")
233 .desc("number of prefetch candidates identified");
234
235 pfBufferHit
236 .name(name() + ".pfBufferHit")
237 .desc("number of redundant prefetches already in prefetch queue");
238
239 pfInCache
240 .name(name() + ".pfInCache")
241 .desc("number of redundant prefetches already in cache/mshr dropped");
242
243 pfRemovedFull
244 .name(name() + ".pfRemovedFull")
245 .desc("number of prefetches dropped due to prefetch queue size");
246
247 pfSpanPage
248 .name(name() + ".pfSpanPage")
249 .desc("number of prefetches that crossed the page");
250 }
251
252
253 void
254 QueuedPrefetcher::processMissingTranslations(unsigned max)
255 {
256 unsigned count = 0;
257 iterator it = pfqMissingTranslation.begin();
258 while (it != pfqMissingTranslation.end() && count < max) {
259 DeferredPacket &dp = *it;
260 // Increase the iterator first because dp.startTranslation can end up
261 // calling finishTranslation, which will erase "it"
262 it++;
263 dp.startTranslation(tlb);
264 count += 1;
265 }
266 }
267
268 void
269 QueuedPrefetcher::translationComplete(DeferredPacket *dp, bool failed)
270 {
271 auto it = pfqMissingTranslation.begin();
272 while (it != pfqMissingTranslation.end()) {
273 if (&(*it) == dp) {
274 break;
275 }
276 it++;
277 }
278 assert(it != pfqMissingTranslation.end());
279 if (!failed) {
280 DPRINTF(HWPrefetch, "%s Translation of vaddr %#x succeeded: "
281 "paddr %#x \n", tlb->name(),
282 it->translationRequest->getVaddr(),
283 it->translationRequest->getPaddr());
284 Addr target_paddr = it->translationRequest->getPaddr();
285 // check if this prefetch is already redundant
286 if (cacheSnoop && (inCache(target_paddr, it->pfInfo.isSecure()) ||
287 inMissQueue(target_paddr, it->pfInfo.isSecure()))) {
288 pfInCache++;
289 DPRINTF(HWPrefetch, "Dropping redundant in "
290 "cache/MSHR prefetch addr:%#x\n", target_paddr);
291 } else {
292 Tick pf_time = curTick() + clockPeriod() * latency;
293 it->createPkt(it->translationRequest->getPaddr(), blkSize,
294 masterId, tagPrefetch, pf_time);
295 addToQueue(pfq, *it);
296 }
297 } else {
298 DPRINTF(HWPrefetch, "%s Translation of vaddr %#x failed, dropping "
299 "prefetch request %#x \n", tlb->name(),
300 it->translationRequest->getVaddr());
301 }
302 pfqMissingTranslation.erase(it);
303 }
304
305 bool
306 QueuedPrefetcher::alreadyInQueue(std::list<DeferredPacket> &queue,
307 const PrefetchInfo &pfi, int32_t priority)
308 {
309 bool found = false;
310 iterator it;
311 for (it = queue.begin(); it != queue.end() && !found; it++) {
312 found = it->pfInfo.sameAddr(pfi);
313 }
314
315 /* If the address is already in the queue, update priority and leave */
316 if (it != queue.end()) {
317 pfBufferHit++;
318 if (it->priority < priority) {
319 /* Update priority value and position in the queue */
320 it->priority = priority;
321 iterator prev = it;
322 while (prev != queue.begin()) {
323 prev--;
324 /* If the packet has higher priority, swap */
325 if (*it > *prev) {
326 std::swap(*it, *prev);
327 it = prev;
328 }
329 }
330 DPRINTF(HWPrefetch, "Prefetch addr already in "
331 "prefetch queue, priority updated\n");
332 } else {
333 DPRINTF(HWPrefetch, "Prefetch addr already in "
334 "prefetch queue\n");
335 }
336 }
337 return found;
338 }
339
340 RequestPtr
341 QueuedPrefetcher::createPrefetchRequest(Addr addr, PrefetchInfo const &pfi,
342 PacketPtr pkt)
343 {
344 RequestPtr translation_req = std::make_shared<Request>(pkt->req->getAsid(),
345 addr, blkSize, pkt->req->getFlags(), masterId, pfi.getPC(),
346 pkt->req->contextId());
347 translation_req->setFlags(Request::PREFETCH);
348 return translation_req;
349 }
350
351 void
352 QueuedPrefetcher::insert(const PacketPtr &pkt, PrefetchInfo &new_pfi,
353 int32_t priority)
354 {
355 if (queueFilter) {
356 if (alreadyInQueue(pfq, new_pfi, priority)) {
357 return;
358 }
359 if (alreadyInQueue(pfqMissingTranslation, new_pfi, priority)) {
360 return;
361 }
362 }
363
364 /*
365 * Physical address computation
366 * if the prefetch is within the same page
367 * using VA: add the computed stride to the original PA
368 * using PA: no actions needed
369 * if we are page crossing
370 * using VA: Create a translaion request and enqueue the corresponding
371 * deferred packet to the queue of pending translations
372 * using PA: use the provided VA to obtain the target VA, then attempt to
373 * translate the resulting address
374 */
375
376 Addr orig_addr = useVirtualAddresses ?
377 pkt->req->getVaddr() : pkt->req->getPaddr();
378 bool positive_stride = new_pfi.getAddr() >= orig_addr;
379 Addr stride = positive_stride ?
380 (new_pfi.getAddr() - orig_addr) : (orig_addr - new_pfi.getAddr());
381
382 Addr target_paddr;
383 bool has_target_pa = false;
384 RequestPtr translation_req = nullptr;
385 if (samePage(orig_addr, new_pfi.getAddr())) {
386 if (useVirtualAddresses) {
387 // if we trained with virtual addresses,
388 // compute the target PA using the original PA and adding the
389 // prefetch stride (difference between target VA and original VA)
390 target_paddr = positive_stride ? (pkt->req->getPaddr() + stride) :
391 (pkt->req->getPaddr() - stride);
392 } else {
393 target_paddr = new_pfi.getAddr();
394 }
395 has_target_pa = true;
396 } else {
397 // Page crossing reference
398
399 // ContextID is needed for translation
400 if (!pkt->req->hasContextId()) {
401 return;
402 }
403 if (useVirtualAddresses) {
404 has_target_pa = false;
405 translation_req = createPrefetchRequest(new_pfi.getAddr(), new_pfi,
406 pkt);
407 } else if (pkt->req->hasVaddr()) {
408 has_target_pa = false;
409 // Compute the target VA using req->getVaddr + stride
410 Addr target_vaddr = positive_stride ?
411 (pkt->req->getVaddr() + stride) :
412 (pkt->req->getVaddr() - stride);
413 translation_req = createPrefetchRequest(target_vaddr, new_pfi,
414 pkt);
415 } else {
416 // Using PA for training but the request does not have a VA,
417 // unable to process this page crossing prefetch.
418 return;
419 }
420 }
421 if (has_target_pa && cacheSnoop &&
422 (inCache(target_paddr, new_pfi.isSecure()) ||
423 inMissQueue(target_paddr, new_pfi.isSecure()))) {
424 pfInCache++;
425 DPRINTF(HWPrefetch, "Dropping redundant in "
426 "cache/MSHR prefetch addr:%#x\n", target_paddr);
427 return;
428 }
429
430 /* Create the packet and find the spot to insert it */
431 DeferredPacket dpp(this, new_pfi, 0, priority);
432 if (has_target_pa) {
433 Tick pf_time = curTick() + clockPeriod() * latency;
434 dpp.createPkt(target_paddr, blkSize, masterId, tagPrefetch, pf_time);
435 DPRINTF(HWPrefetch, "Prefetch queued. "
436 "addr:%#x priority: %3d tick:%lld.\n",
437 new_pfi.getAddr(), priority, pf_time);
438 addToQueue(pfq, dpp);
439 } else {
440 // Add the translation request and try to resolve it later
441 dpp.setTranslationRequest(translation_req);
442 dpp.tc = cache->system->getThreadContext(translation_req->contextId());
443 DPRINTF(HWPrefetch, "Prefetch queued with no translation. "
444 "addr:%#x priority: %3d\n", new_pfi.getAddr(), priority);
445 addToQueue(pfqMissingTranslation, dpp);
446 }
447 }
448
449 void
450 QueuedPrefetcher::addToQueue(std::list<DeferredPacket> &queue,
451 DeferredPacket &dpp)
452 {
453 /* Verify prefetch buffer space for request */
454 if (queue.size() == queueSize) {
455 pfRemovedFull++;
456 /* Lowest priority packet */
457 iterator it = queue.end();
458 panic_if (it == queue.begin(),
459 "Prefetch queue is both full and empty!");
460 --it;
461 /* Look for oldest in that level of priority */
462 panic_if (it == queue.begin(),
463 "Prefetch queue is full with 1 element!");
464 iterator prev = it;
465 bool cont = true;
466 /* While not at the head of the queue */
467 while (cont && prev != queue.begin()) {
468 prev--;
469 /* While at the same level of priority */
470 cont = prev->priority == it->priority;
471 if (cont)
472 /* update pointer */
473 it = prev;
474 }
475 DPRINTF(HWPrefetch, "Prefetch queue full, removing lowest priority "
476 "oldest packet, addr: %#x\n",it->pfInfo.getAddr());
477 delete it->pkt;
478 queue.erase(it);
479 }
480
481 if (queue.size() == 0) {
482 queue.emplace_back(dpp);
483 } else {
484 iterator it = queue.end();
485 do {
486 --it;
487 } while (it != queue.begin() && dpp > *it);
488 /* If we reach the head, we have to see if the new element is new head
489 * or not */
490 if (it == queue.begin() && dpp <= *it)
491 it++;
492 queue.insert(it, dpp);
493 }
494 }