src/mem/cache/prefetch/queued.cc

   1 /*
   2  * Copyright (c) 2014-2015 ARM Limited
   3  * All rights reserved
   4  *
   5  * The license below extends only to copyright in the software and shall
   6  * not be construed as granting a license to any other intellectual
   7  * property including but not limited to intellectual property relating
   8  * to a hardware implementation of the functionality of the software
   9  * licensed hereunder.  You may use the software subject to the license
  10  * terms below provided that you ensure that this notice is replicated
  11  * unmodified and in its entirety in all distributions of the software,
  12  * modified or unmodified, in source code or in binary form.
  13  *
  14  * Redistribution and use in source and binary forms, with or without
  15  * modification, are permitted provided that the following conditions are
  16  * met: redistributions of source code must retain the above copyright
  17  * notice, this list of conditions and the following disclaimer;
  18  * redistributions in binary form must reproduce the above copyright
  19  * notice, this list of conditions and the following disclaimer in the
  20  * documentation and/or other materials provided with the distribution;
  21  * neither the name of the copyright holders nor the names of its
  22  * contributors may be used to endorse or promote products derived from
  23  * this software without specific prior written permission.
  24  *
  25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  28  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  29  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  30  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  31  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  32  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  33  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  34  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  35  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36  *
  37  * Authors: Mitch Hayenga
  38  */
  39
  40 #include "mem/cache/prefetch/queued.hh"
  41
  42 #include <cassert>
  43
  44 #include "arch/generic/tlb.hh"
  45 #include "base/logging.hh"
  46 #include "base/trace.hh"
  47 #include "debug/HWPrefetch.hh"
  48 #include "mem/cache/base.hh"
  49 #include "mem/request.hh"
  50 #include "params/QueuedPrefetcher.hh"
  51
  52 void
  53 QueuedPrefetcher::DeferredPacket::createPkt(Addr paddr, unsigned blk_size,
  54                                             MasterID mid, bool tag_prefetch,
  55                                             Tick t) {
  56     /* Create a prefetch memory request */
  57     RequestPtr req = std::make_shared<Request>(paddr, blk_size, 0, mid);
  58
  59     if (pfInfo.isSecure()) {
  60         req->setFlags(Request::SECURE);
  61     }
  62     req->taskId(ContextSwitchTaskId::Prefetcher);
  63     pkt = new Packet(req, MemCmd::HardPFReq);
  64     pkt->allocate();
  65     if (tag_prefetch && pfInfo.hasPC()) {
  66         // Tag prefetch packet with  accessing pc
  67         pkt->req->setPC(pfInfo.getPC());
  68     }
  69     tick = t;
  70 }
  71
  72 void
  73 QueuedPrefetcher::DeferredPacket::startTranslation(BaseTLB *tlb)
  74 {
  75     assert(translationRequest != nullptr);
  76     if (!ongoingTranslation) {
  77         ongoingTranslation = true;
  78         // Prefetchers only operate in Timing mode
  79         tlb->translateTiming(translationRequest, tc, this, BaseTLB::Read);
  80     }
  81 }
  82
  83 void
  84 QueuedPrefetcher::DeferredPacket::finish(const Fault &fault,
  85     const RequestPtr &req, ThreadContext *tc, BaseTLB::Mode mode)
  86 {
  87     assert(ongoingTranslation);
  88     ongoingTranslation = false;
  89     bool failed = (fault != NoFault);
  90     owner->translationComplete(this, failed);
  91 }
  92
  93 QueuedPrefetcher::QueuedPrefetcher(const QueuedPrefetcherParams *p)
  94     : BasePrefetcher(p), queueSize(p->queue_size),
  95       missingTranslationQueueSize(
  96         p->max_prefetch_requests_with_pending_translation),
  97       latency(p->latency), queueSquash(p->queue_squash),
  98       queueFilter(p->queue_filter), cacheSnoop(p->cache_snoop),
  99       tagPrefetch(p->tag_prefetch),
 100       throttleControlPct(p->throttle_control_percentage)
 101 {
 102 }
 103
 104 QueuedPrefetcher::~QueuedPrefetcher()
 105 {
 106     // Delete the queued prefetch packets
 107     for (DeferredPacket &p : pfq) {
 108         delete p.pkt;
 109     }
 110 }
 111
 112 size_t
 113 QueuedPrefetcher::getMaxPermittedPrefetches(size_t total) const
 114 {
 115     /**
 116      * Throttle generated prefetches based in the accuracy of the prefetcher.
 117      * Accuracy is computed based in the ratio of useful prefetches with
 118      * respect to the number of issued prefetches.
 119      *
 120      * The throttleControlPct controls how many of the candidate addresses
 121      * generated by the prefetcher will be finally turned into prefetch
 122      * requests
 123      * - If set to 100, all candidates can be discarded (one request
 124      *   will always be allowed to be generated)
 125      * - Setting it to 0 will disable the throttle control, so requests are
 126      *   created for all candidates
 127      * - If set to 60, 40% of candidates will generate a request, and the
 128      *   remaining 60% will be generated depending on the current accuracy
 129      */
 130
 131     size_t max_pfs = total;
 132     if (total > 0 && issuedPrefetches > 0) {
 133         size_t throttle_pfs = (total * throttleControlPct) / 100;
 134         size_t min_pfs = (total - throttle_pfs) == 0 ?
 135             1 : (total - throttle_pfs);
 136         max_pfs = min_pfs + (total - min_pfs) *
 137             usefulPrefetches / issuedPrefetches;
 138     }
 139     return max_pfs;
 140 }
 141
 142 void
 143 QueuedPrefetcher::notify(const PacketPtr &pkt, const PrefetchInfo &pfi)
 144 {
 145     Addr blk_addr = blockAddress(pfi.getAddr());
 146     bool is_secure = pfi.isSecure();
 147
 148     // Squash queued prefetches if demand miss to same line
 149     if (queueSquash) {
 150         auto itr = pfq.begin();
 151         while (itr != pfq.end()) {
 152             if (itr->pfInfo.getAddr() == blk_addr &&
 153                 itr->pfInfo.isSecure() == is_secure) {
 154                 delete itr->pkt;
 155                 itr = pfq.erase(itr);
 156             } else {
 157                 ++itr;
 158             }
 159         }
 160     }
 161
 162     // Calculate prefetches given this access
 163     std::vector<AddrPriority> addresses;
 164     calculatePrefetch(pfi, addresses);
 165
 166     // Get the maximu number of prefetches that we are allowed to generate
 167     size_t max_pfs = getMaxPermittedPrefetches(addresses.size());
 168
 169     // Queue up generated prefetches
 170     size_t num_pfs = 0;
 171     for (AddrPriority& addr_prio : addresses) {
 172
 173         // Block align prefetch address
 174         addr_prio.first = blockAddress(addr_prio.first);
 175
 176         if (!samePage(addr_prio.first, pfi.getAddr())) {
 177             pfSpanPage += 1;
 178         }
 179
 180         bool can_cross_page = (tlb != nullptr);
 181         if (can_cross_page || samePage(addr_prio.first, pfi.getAddr())) {
 182             PrefetchInfo new_pfi(pfi,addr_prio.first);
 183             pfIdentified++;
 184             DPRINTF(HWPrefetch, "Found a pf candidate addr: %#x, "
 185                     "inserting into prefetch queue.\n", new_pfi.getAddr());
 186             // Create and insert the request
 187             insert(pkt, new_pfi, addr_prio.second);
 188             num_pfs += 1;
 189             if (num_pfs == max_pfs) {
 190                 break;
 191             }
 192         } else {
 193             DPRINTF(HWPrefetch, "Ignoring page crossing prefetch.\n");
 194         }
 195     }
 196 }
 197
 198 PacketPtr
 199 QueuedPrefetcher::getPacket()
 200 {
 201     DPRINTF(HWPrefetch, "Requesting a prefetch to issue.\n");
 202
 203     if (pfq.empty()) {
 204         // If the queue is empty, attempt first to fill it with requests
 205         // from the queue of missing translations
 206         processMissingTranslations(queueSize);
 207     }
 208
 209     if (pfq.empty()) {
 210         DPRINTF(HWPrefetch, "No hardware prefetches available.\n");
 211         return nullptr;
 212     }
 213
 214     PacketPtr pkt = pfq.front().pkt;
 215     pfq.pop_front();
 216
 217     pfIssued++;
 218     issuedPrefetches += 1;
 219     assert(pkt != nullptr);
 220     DPRINTF(HWPrefetch, "Generating prefetch for %#x.\n", pkt->getAddr());
 221
 222     processMissingTranslations(queueSize - pfq.size());
 223     return pkt;
 224 }
 225
 226 void
 227 QueuedPrefetcher::regStats()
 228 {
 229     BasePrefetcher::regStats();
 230
 231     pfIdentified
 232         .name(name() + ".pfIdentified")
 233         .desc("number of prefetch candidates identified");
 234
 235     pfBufferHit
 236         .name(name() + ".pfBufferHit")
 237         .desc("number of redundant prefetches already in prefetch queue");
 238
 239     pfInCache
 240         .name(name() + ".pfInCache")
 241         .desc("number of redundant prefetches already in cache/mshr dropped");
 242
 243     pfRemovedFull
 244         .name(name() + ".pfRemovedFull")
 245         .desc("number of prefetches dropped due to prefetch queue size");
 246
 247     pfSpanPage
 248         .name(name() + ".pfSpanPage")
 249         .desc("number of prefetches that crossed the page");
 250 }
 251
 252
 253 void
 254 QueuedPrefetcher::processMissingTranslations(unsigned max)
 255 {
 256     unsigned count = 0;
 257     iterator it = pfqMissingTranslation.begin();
 258     while (it != pfqMissingTranslation.end() && count < max) {
 259         DeferredPacket &dp = *it;
 260         // Increase the iterator first because dp.startTranslation can end up
 261         // calling finishTranslation, which will erase "it"
 262         it++;
 263         dp.startTranslation(tlb);
 264         count += 1;
 265     }
 266 }
 267
 268 void
 269 QueuedPrefetcher::translationComplete(DeferredPacket *dp, bool failed)
 270 {
 271     auto it = pfqMissingTranslation.begin();
 272     while (it != pfqMissingTranslation.end()) {
 273         if (&(*it) == dp) {
 274             break;
 275         }
 276         it++;
 277     }
 278     assert(it != pfqMissingTranslation.end());
 279     if (!failed) {
 280         DPRINTF(HWPrefetch, "%s Translation of vaddr %#x succeeded: "
 281                 "paddr %#x \n", tlb->name(),
 282                 it->translationRequest->getVaddr(),
 283                 it->translationRequest->getPaddr());
 284         Addr target_paddr = it->translationRequest->getPaddr();
 285         // check if this prefetch is already redundant
 286         if (cacheSnoop && (inCache(target_paddr, it->pfInfo.isSecure()) ||
 287                     inMissQueue(target_paddr, it->pfInfo.isSecure()))) {
 288             pfInCache++;
 289             DPRINTF(HWPrefetch, "Dropping redundant in "
 290                     "cache/MSHR prefetch addr:%#x\n", target_paddr);
 291         } else {
 292             Tick pf_time = curTick() + clockPeriod() * latency;
 293             it->createPkt(it->translationRequest->getPaddr(), blkSize,
 294                     masterId, tagPrefetch, pf_time);
 295             addToQueue(pfq, *it);
 296         }
 297     } else {
 298         DPRINTF(HWPrefetch, "%s Translation of vaddr %#x failed, dropping "
 299                 "prefetch request %#x \n", tlb->name(),
 300                 it->translationRequest->getVaddr());
 301     }
 302     pfqMissingTranslation.erase(it);
 303 }
 304
 305 bool
 306 QueuedPrefetcher::alreadyInQueue(std::list<DeferredPacket> &queue,
 307                                  const PrefetchInfo &pfi, int32_t priority)
 308 {
 309     bool found = false;
 310     iterator it;
 311     for (it = queue.begin(); it != queue.end() && !found; it++) {
 312         found = it->pfInfo.sameAddr(pfi);
 313     }
 314
 315     /* If the address is already in the queue, update priority and leave */
 316     if (it != queue.end()) {
 317         pfBufferHit++;
 318         if (it->priority < priority) {
 319             /* Update priority value and position in the queue */
 320             it->priority = priority;
 321             iterator prev = it;
 322             while (prev != queue.begin()) {
 323                 prev--;
 324                 /* If the packet has higher priority, swap */
 325                 if (*it > *prev) {
 326                     std::swap(*it, *prev);
 327                     it = prev;
 328                 }
 329             }
 330             DPRINTF(HWPrefetch, "Prefetch addr already in "
 331                 "prefetch queue, priority updated\n");
 332         } else {
 333             DPRINTF(HWPrefetch, "Prefetch addr already in "
 334                 "prefetch queue\n");
 335         }
 336     }
 337     return found;
 338 }
 339
 340 RequestPtr
 341 QueuedPrefetcher::createPrefetchRequest(Addr addr, PrefetchInfo const &pfi,
 342                                         PacketPtr pkt)
 343 {
 344     RequestPtr translation_req = std::make_shared<Request>(pkt->req->getAsid(),
 345             addr, blkSize, pkt->req->getFlags(), masterId, pfi.getPC(),
 346             pkt->req->contextId());
 347     translation_req->setFlags(Request::PREFETCH);
 348     return translation_req;
 349 }
 350
 351 void
 352 QueuedPrefetcher::insert(const PacketPtr &pkt, PrefetchInfo &new_pfi,
 353                          int32_t priority)
 354 {
 355     if (queueFilter) {
 356         if (alreadyInQueue(pfq, new_pfi, priority)) {
 357             return;
 358         }
 359         if (alreadyInQueue(pfqMissingTranslation, new_pfi, priority)) {
 360             return;
 361         }
 362     }
 363
 364     /*
 365      * Physical address computation
 366      * if the prefetch is within the same page
 367      *   using VA: add the computed stride to the original PA
 368      *   using PA: no actions needed
 369      * if we are page crossing
 370      *   using VA: Create a translaion request and enqueue the corresponding
 371      *       deferred packet to the queue of pending translations
 372      *   using PA: use the provided VA to obtain the target VA, then attempt to
 373      *     translate the resulting address
 374      */
 375
 376     Addr orig_addr = useVirtualAddresses ?
 377         pkt->req->getVaddr() : pkt->req->getPaddr();
 378     bool positive_stride = new_pfi.getAddr() >= orig_addr;
 379     Addr stride = positive_stride ?
 380         (new_pfi.getAddr() - orig_addr) : (orig_addr - new_pfi.getAddr());
 381
 382     Addr target_paddr;
 383     bool has_target_pa = false;
 384     RequestPtr translation_req = nullptr;
 385     if (samePage(orig_addr, new_pfi.getAddr())) {
 386         if (useVirtualAddresses) {
 387             // if we trained with virtual addresses,
 388             // compute the target PA using the original PA and adding the
 389             // prefetch stride (difference between target VA and original VA)
 390             target_paddr = positive_stride ? (pkt->req->getPaddr() + stride) :
 391                 (pkt->req->getPaddr() - stride);
 392         } else {
 393             target_paddr = new_pfi.getAddr();
 394         }
 395         has_target_pa = true;
 396     } else {
 397         // Page crossing reference
 398
 399         // ContextID is needed for translation
 400         if (!pkt->req->hasContextId()) {
 401             return;
 402         }
 403         if (useVirtualAddresses) {
 404             has_target_pa = false;
 405             translation_req = createPrefetchRequest(new_pfi.getAddr(), new_pfi,
 406                                                     pkt);
 407         } else if (pkt->req->hasVaddr()) {
 408             has_target_pa = false;
 409             // Compute the target VA using req->getVaddr + stride
 410             Addr target_vaddr = positive_stride ?
 411                 (pkt->req->getVaddr() + stride) :
 412                 (pkt->req->getVaddr() - stride);
 413             translation_req = createPrefetchRequest(target_vaddr, new_pfi,
 414                                                     pkt);
 415         } else {
 416             // Using PA for training but the request does not have a VA,
 417             // unable to process this page crossing prefetch.
 418             return;
 419         }
 420     }
 421     if (has_target_pa && cacheSnoop &&
 422             (inCache(target_paddr, new_pfi.isSecure()) ||
 423             inMissQueue(target_paddr, new_pfi.isSecure()))) {
 424         pfInCache++;
 425         DPRINTF(HWPrefetch, "Dropping redundant in "
 426                 "cache/MSHR prefetch addr:%#x\n", target_paddr);
 427         return;
 428     }
 429
 430     /* Create the packet and find the spot to insert it */
 431     DeferredPacket dpp(this, new_pfi, 0, priority);
 432     if (has_target_pa) {
 433         Tick pf_time = curTick() + clockPeriod() * latency;
 434         dpp.createPkt(target_paddr, blkSize, masterId, tagPrefetch, pf_time);
 435         DPRINTF(HWPrefetch, "Prefetch queued. "
 436                 "addr:%#x priority: %3d tick:%lld.\n",
 437                 new_pfi.getAddr(), priority, pf_time);
 438         addToQueue(pfq, dpp);
 439     } else {
 440         // Add the translation request and try to resolve it later
 441         dpp.setTranslationRequest(translation_req);
 442         dpp.tc = cache->system->getThreadContext(translation_req->contextId());
 443         DPRINTF(HWPrefetch, "Prefetch queued with no translation. "
 444                 "addr:%#x priority: %3d\n", new_pfi.getAddr(), priority);
 445         addToQueue(pfqMissingTranslation, dpp);
 446     }
 447 }
 448
 449 void
 450 QueuedPrefetcher::addToQueue(std::list<DeferredPacket> &queue,
 451                              DeferredPacket &dpp)
 452 {
 453     /* Verify prefetch buffer space for request */
 454     if (queue.size() == queueSize) {
 455         pfRemovedFull++;
 456         /* Lowest priority packet */
 457         iterator it = queue.end();
 458         panic_if (it == queue.begin(),
 459             "Prefetch queue is both full and empty!");
 460         --it;
 461         /* Look for oldest in that level of priority */
 462         panic_if (it == queue.begin(),
 463             "Prefetch queue is full with 1 element!");
 464         iterator prev = it;
 465         bool cont = true;
 466         /* While not at the head of the queue */
 467         while (cont && prev != queue.begin()) {
 468             prev--;
 469             /* While at the same level of priority */
 470             cont = prev->priority == it->priority;
 471             if (cont)
 472                 /* update pointer */
 473                 it = prev;
 474         }
 475         DPRINTF(HWPrefetch, "Prefetch queue full, removing lowest priority "
 476                             "oldest packet, addr: %#x\n",it->pfInfo.getAddr());
 477         delete it->pkt;
 478         queue.erase(it);
 479     }
 480
 481     if (queue.size() == 0) {
 482         queue.emplace_back(dpp);
 483     } else {
 484         iterator it = queue.end();
 485         do {
 486             --it;
 487         } while (it != queue.begin() && dpp > *it);
 488         /* If we reach the head, we have to see if the new element is new head
 489          * or not */
 490         if (it == queue.begin() && dpp <= *it)
 491             it++;
 492         queue.insert(it, dpp);
 493     }
 494 }