src/mem/cache/cache_impl.hh

   1 /*
   2  * Copyright (c) 2002-2005 The Regents of The University of Michigan
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions are
   7  * met: redistributions of source code must retain the above copyright
   8  * notice, this list of conditions and the following disclaimer;
   9  * redistributions in binary form must reproduce the above copyright
  10  * notice, this list of conditions and the following disclaimer in the
  11  * documentation and/or other materials provided with the distribution;
  12  * neither the name of the copyright holders nor the names of its
  13  * contributors may be used to endorse or promote products derived from
  14  * this software without specific prior written permission.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  17  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  20  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  *
  28  * Authors: Erik Hallnor
  29  *          Dave Greene
  30  *          Nathan Binkert
  31  *          Steve Reinhardt
  32  *          Ron Dreslinski
  33  */
  34
  35 /**
  36  * @file
  37  * Cache definitions.
  38  */
  39
  40 #include "sim/host.hh"
  41 #include "base/misc.hh"
  42
  43 #include "mem/cache/cache.hh"
  44 #include "mem/cache/cache_blk.hh"
  45 #include "mem/cache/miss/mshr.hh"
  46 #include "mem/cache/prefetch/base_prefetcher.hh"
  47
  48 #include "sim/sim_exit.hh" // for SimExitEvent
  49
  50
  51 template<class TagStore>
  52 Cache<TagStore>::Cache(const std::string &_name,
  53                        Cache<TagStore>::Params &params)
  54     : BaseCache(_name, params.baseParams),
  55       prefetchAccess(params.prefetchAccess),
  56       tags(params.tags),
  57       prefetcher(params.prefetcher),
  58       doFastWrites(params.doFastWrites),
  59       prefetchMiss(params.prefetchMiss)
  60 {
  61     tempBlock = new BlkType();
  62     tempBlock->data = new uint8_t[blkSize];
  63
  64     cpuSidePort = new CpuSidePort(_name + "-cpu_side_port", this);
  65     memSidePort = new MemSidePort(_name + "-mem_side_port", this);
  66     cpuSidePort->setOtherPort(memSidePort);
  67     memSidePort->setOtherPort(cpuSidePort);
  68
  69     tags->setCache(this);
  70     prefetcher->setCache(this);
  71 }
  72
  73 template<class TagStore>
  74 void
  75 Cache<TagStore>::regStats()
  76 {
  77     BaseCache::regStats();
  78     tags->regStats(name());
  79     prefetcher->regStats(name());
  80 }
  81
  82 template<class TagStore>
  83 Port *
  84 Cache<TagStore>::getPort(const std::string &if_name, int idx)
  85 {
  86     if (if_name == "" || if_name == "cpu_side") {
  87         return cpuSidePort;
  88     } else if (if_name == "mem_side") {
  89         return memSidePort;
  90     } else if (if_name == "functional") {
  91         return new CpuSidePort(name() + "-cpu_side_funcport", this);
  92     } else {
  93         panic("Port name %s unrecognized\n", if_name);
  94     }
  95 }
  96
  97 template<class TagStore>
  98 void
  99 Cache<TagStore>::deletePortRefs(Port *p)
 100 {
 101     if (cpuSidePort == p || memSidePort == p)
 102         panic("Can only delete functional ports\n");
 103
 104     delete p;
 105 }
 106
 107
 108 template<class TagStore>
 109 void
 110 Cache<TagStore>::cmpAndSwap(BlkType *blk, PacketPtr pkt)
 111 {
 112     uint64_t overwrite_val;
 113     bool overwrite_mem;
 114     uint64_t condition_val64;
 115     uint32_t condition_val32;
 116
 117     int offset = tags->extractBlkOffset(pkt->getAddr());
 118     uint8_t *blk_data = blk->data + offset;
 119
 120     assert(sizeof(uint64_t) >= pkt->getSize());
 121
 122     overwrite_mem = true;
 123     // keep a copy of our possible write value, and copy what is at the
 124     // memory address into the packet
 125     pkt->writeData((uint8_t *)&overwrite_val);
 126     pkt->setData(blk_data);
 127
 128     if (pkt->req->isCondSwap()) {
 129         if (pkt->getSize() == sizeof(uint64_t)) {
 130             condition_val64 = pkt->req->getExtraData();
 131             overwrite_mem = !std::memcmp(&condition_val64, blk_data,
 132                                          sizeof(uint64_t));
 133         } else if (pkt->getSize() == sizeof(uint32_t)) {
 134             condition_val32 = (uint32_t)pkt->req->getExtraData();
 135             overwrite_mem = !std::memcmp(&condition_val32, blk_data,
 136                                          sizeof(uint32_t));
 137         } else
 138             panic("Invalid size for conditional read/write\n");
 139     }
 140
 141     if (overwrite_mem)
 142         std::memcpy(blk_data, &overwrite_val, pkt->getSize());
 143 }
 144
 145
 146 template<class TagStore>
 147 void
 148 Cache<TagStore>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
 149 {
 150     assert(blk);
 151     assert(pkt->needsExclusive() ? blk->isWritable() : blk->isValid());
 152     assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
 153
 154     // Check RMW operations first since both isRead() and
 155     // isWrite() will be true for them
 156     if (pkt->cmd == MemCmd::SwapReq) {
 157         cmpAndSwap(blk, pkt);
 158     } else if (pkt->isWrite()) {
 159         if (blk->checkWrite(pkt)) {
 160             blk->status |= BlkDirty;
 161             pkt->writeDataToBlock(blk->data, blkSize);
 162         }
 163     } else if (pkt->isRead()) {
 164         if (pkt->isLocked()) {
 165             blk->trackLoadLocked(pkt);
 166         }
 167         pkt->setDataFromBlock(blk->data, blkSize);
 168     } else {
 169         // Not a read or write... must be an upgrade.  it's OK
 170         // to just ack those as long as we have an exclusive
 171         // copy at this level.
 172         assert(pkt->cmd == MemCmd::UpgradeReq);
 173     }
 174 }
 175
 176
 177 /////////////////////////////////////////////////////
 178 //
 179 // MSHR helper functions
 180 //
 181 /////////////////////////////////////////////////////
 182
 183
 184 template<class TagStore>
 185 void
 186 Cache<TagStore>::markInService(MSHR *mshr)
 187 {
 188     markInServiceInternal(mshr);
 189 #if 0
 190         if (mshr->originalCmd == MemCmd::HardPFReq) {
 191             DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n",
 192                     name());
 193             //Also clear pending if need be
 194             if (!prefetcher->havePending())
 195             {
 196                 deassertMemSideBusRequest(Request_PF);
 197             }
 198         }
 199 #endif
 200 }
 201
 202
 203 template<class TagStore>
 204 void
 205 Cache<TagStore>::squash(int threadNum)
 206 {
 207     bool unblock = false;
 208     BlockedCause cause = NUM_BLOCKED_CAUSES;
 209
 210     if (noTargetMSHR && noTargetMSHR->threadNum == threadNum) {
 211         noTargetMSHR = NULL;
 212         unblock = true;
 213         cause = Blocked_NoTargets;
 214     }
 215     if (mshrQueue.isFull()) {
 216         unblock = true;
 217         cause = Blocked_NoMSHRs;
 218     }
 219     mshrQueue.squash(threadNum);
 220     if (unblock && !mshrQueue.isFull()) {
 221         clearBlocked(cause);
 222     }
 223 }
 224
 225 /////////////////////////////////////////////////////
 226 //
 227 // Access path: requests coming in from the CPU side
 228 //
 229 /////////////////////////////////////////////////////
 230
 231 template<class TagStore>
 232 bool
 233 Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, int &lat)
 234 {
 235     if (pkt->req->isUncacheable())  {
 236         blk = NULL;
 237         lat = hitLatency;
 238         return false;
 239     }
 240
 241     bool satisfied = false;  // assume the worst
 242     blk = tags->findBlock(pkt->getAddr(), lat);
 243
 244     if (prefetchAccess) {
 245         //We are determining prefetches on access stream, call prefetcher
 246         prefetcher->handleMiss(pkt, curTick);
 247     }
 248
 249     DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(), pkt->getAddr(),
 250             (blk) ? "hit" : "miss");
 251
 252     if (blk != NULL) {
 253         // HIT
 254         if (blk->isPrefetch()) {
 255             //Signal that this was a hit under prefetch (no need for
 256             //use prefetch (only can get here if true)
 257             DPRINTF(HWPrefetch, "Hit a block that was prefetched\n");
 258             blk->status &= ~BlkHWPrefetched;
 259             if (prefetchMiss) {
 260                 //If we are using the miss stream, signal the
 261                 //prefetcher otherwise the access stream would have
 262                 //already signaled this hit
 263                 prefetcher->handleMiss(pkt, curTick);
 264             }
 265         }
 266
 267         if (pkt->needsExclusive() ? blk->isWritable() : blk->isValid()) {
 268             // OK to satisfy access
 269             hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
 270             satisfied = true;
 271             satisfyCpuSideRequest(pkt, blk);
 272         } else {
 273             // permission violation... nothing to do here, leave unsatisfied
 274             // for statistics purposes this counts like a complete miss
 275             incMissCount(pkt);
 276         }
 277     } else {
 278         // complete miss (no matching block)
 279         incMissCount(pkt);
 280
 281         if (pkt->isLocked() && pkt->isWrite()) {
 282             // miss on store conditional... just give up now
 283             pkt->req->setExtraData(0);
 284             satisfied = true;
 285         }
 286     }
 287
 288     return satisfied;
 289 }
 290
 291
 292 template<class TagStore>
 293 bool
 294 Cache<TagStore>::timingAccess(PacketPtr pkt)
 295 {
 296 //@todo Add back in MemDebug Calls
 297 //    MemDebug::cacheAccess(pkt);
 298
 299     // we charge hitLatency for doing just about anything here
 300     Tick time =  curTick + hitLatency;
 301
 302     if (pkt->memInhibitAsserted()) {
 303         DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n",
 304                 pkt->getAddr());
 305         assert(!pkt->req->isUncacheable());
 306         return true;
 307     }
 308
 309     if (pkt->req->isUncacheable()) {
 310         allocateBuffer(pkt, time, true);
 311         assert(pkt->needsResponse()); // else we should delete it here??
 312         return true;
 313     }
 314
 315     int lat = hitLatency;
 316     bool satisfied = false;
 317
 318     Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
 319     MSHR *mshr = mshrQueue.findMatch(blk_addr);
 320
 321     if (!mshr) {
 322         // no outstanding access to this block, look up in cache
 323         // (otherwise if we allow reads while there's an outstanding
 324         // write miss, the read could return stale data out of the
 325         // cache block... a more aggressive system could detect the
 326         // overlap (if any) and forward data out of the MSHRs, but we
 327         // don't do that yet)
 328         BlkType *blk = NULL;
 329         satisfied = access(pkt, blk, lat);
 330     }
 331
 332 #if 0
 333     PacketList writebacks;
 334
 335     // If this is a block size write/hint (WH64) allocate the block here
 336     // if the coherence protocol allows it.
 337     /** @todo make the fast write alloc (wh64) work with coherence. */
 338     /** @todo Do we want to do fast writes for writebacks as well? */
 339     if (!blk && pkt->getSize() >= blkSize && coherence->allowFastWrites() &&
 340         (pkt->cmd == MemCmd::WriteReq
 341          || pkt->cmd == MemCmd::WriteInvalidateReq) ) {
 342         // not outstanding misses, can do this
 343         MSHR *outstanding_miss = mshrQueue.findMatch(pkt->getAddr());
 344         if (pkt->cmd == MemCmd::WriteInvalidateReq || !outstanding_miss) {
 345             if (outstanding_miss) {
 346                 warn("WriteInv doing a fastallocate"
 347                      "with an outstanding miss to the same address\n");
 348             }
 349             blk = handleFill(NULL, pkt, BlkValid | BlkWritable,
 350                                    writebacks);
 351             ++fastWrites;
 352         }
 353     }
 354
 355     // copy writebacks to write buffer
 356     while (!writebacks.empty()) {
 357         PacketPtr wbPkt = writebacks.front();
 358         allocateBuffer(wbPkt, time, true);
 359         writebacks.pop_front();
 360     }
 361 #endif
 362
 363     bool needsResponse = pkt->needsResponse();
 364
 365     if (satisfied) {
 366         assert(needsResponse);
 367         pkt->makeTimingResponse();
 368         cpuSidePort->respond(pkt, curTick+lat);
 369     } else {
 370         // miss
 371         if (prefetchMiss)
 372             prefetcher->handleMiss(pkt, time);
 373
 374         if (mshr) {
 375             // MSHR hit
 376             //@todo remove hw_pf here
 377             mshr_hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
 378             if (mshr->threadNum != 0/*pkt->req->getThreadNum()*/) {
 379                 mshr->threadNum = -1;
 380             }
 381             mshr->allocateTarget(pkt, time, order++);
 382             if (mshr->getNumTargets() == numTarget) {
 383                 noTargetMSHR = mshr;
 384                 setBlocked(Blocked_NoTargets);
 385                 // need to be careful with this... if this mshr isn't
 386                 // ready yet (i.e. time > curTick_, we don't want to
 387                 // move it ahead of mshrs that are ready
 388                 // mshrQueue.moveToFront(mshr);
 389             }
 390         } else {
 391             // no MSHR
 392             mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
 393             // always mark as cache fill for now... if we implement
 394             // no-write-allocate or bypass accesses this will have to
 395             // be changed.
 396             allocateMissBuffer(pkt, time, true);
 397         }
 398     }
 399
 400     if (!needsResponse) {
 401         // Need to clean up the packet on a writeback miss, but leave
 402         // the request for the next level.
 403         delete pkt;
 404     }
 405
 406     return true;
 407 }
 408
 409
 410 template<class TagStore>
 411 PacketPtr
 412 Cache<TagStore>::getBusPacket(PacketPtr cpu_pkt, BlkType *blk,
 413                               bool needsExclusive)
 414 {
 415     bool blkValid = blk && blk->isValid();
 416
 417     if (cpu_pkt->req->isUncacheable()) {
 418         assert(blk == NULL);
 419         return NULL;
 420     }
 421
 422     if (!blkValid &&
 423         (cpu_pkt->cmd == MemCmd::Writeback ||
 424          cpu_pkt->cmd == MemCmd::UpgradeReq)) {
 425             // For now, writebacks from upper-level caches that
 426             // completely miss in the cache just go through. If we had
 427             // "fast write" support (where we could write the whole
 428             // block w/o fetching new data) we might want to allocate
 429             // on writeback misses instead.
 430         return NULL;
 431     }
 432
 433     assert(cpu_pkt->needsResponse());
 434
 435     MemCmd cmd;
 436     const bool useUpgrades = true;
 437     if (blkValid && useUpgrades) {
 438         // only reason to be here is that blk is shared
 439         // (read-only) and we need exclusive
 440         assert(needsExclusive && !blk->isWritable());
 441         cmd = MemCmd::UpgradeReq;
 442     } else {
 443         // block is invalid
 444         cmd = needsExclusive ? MemCmd::ReadExReq : MemCmd::ReadReq;
 445     }
 446     PacketPtr pkt = new Packet(cpu_pkt->req, cmd, Packet::Broadcast, blkSize);
 447
 448     pkt->allocate();
 449     return pkt;
 450 }
 451
 452
 453 template<class TagStore>
 454 Tick
 455 Cache<TagStore>::atomicAccess(PacketPtr pkt)
 456 {
 457     int lat = hitLatency;
 458
 459     if (pkt->memInhibitAsserted()) {
 460         DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n",
 461                 pkt->getAddr());
 462         assert(!pkt->req->isUncacheable());
 463         return lat;
 464     }
 465
 466     // should assert here that there are no outstanding MSHRs or
 467     // writebacks... that would mean that someone used an atomic
 468     // access in timing mode
 469
 470     BlkType *blk = NULL;
 471
 472     if (!access(pkt, blk, lat)) {
 473         // MISS
 474         PacketPtr busPkt = getBusPacket(pkt, blk, pkt->needsExclusive());
 475
 476         bool isCacheFill = (busPkt != NULL);
 477
 478         if (busPkt == NULL) {
 479             // just forwarding the same request to the next level
 480             // no local cache operation involved
 481             busPkt = pkt;
 482         }
 483
 484         DPRINTF(Cache, "Sending an atomic %s for %x\n",
 485                 busPkt->cmdString(), busPkt->getAddr());
 486
 487 #if TRACING_ON
 488         CacheBlk::State old_state = blk ? blk->status : 0;
 489 #endif
 490
 491         lat += memSidePort->sendAtomic(busPkt);
 492
 493         DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n",
 494                 busPkt->cmdString(), busPkt->getAddr(), old_state);
 495
 496         if (isCacheFill) {
 497             PacketList writebacks;
 498             blk = handleFill(busPkt, blk, writebacks);
 499             satisfyCpuSideRequest(pkt, blk);
 500             delete busPkt;
 501
 502             // Handle writebacks if needed
 503             while (!writebacks.empty()){
 504                 PacketPtr wbPkt = writebacks.front();
 505                 memSidePort->sendAtomic(wbPkt);
 506                 writebacks.pop_front();
 507                 delete wbPkt;
 508             }
 509         }
 510     }
 511
 512     // We now have the block one way or another (hit or completed miss)
 513
 514     if (pkt->needsResponse()) {
 515         pkt->makeAtomicResponse();
 516     }
 517
 518     return lat;
 519 }
 520
 521
 522 template<class TagStore>
 523 void
 524 Cache<TagStore>::functionalAccess(PacketPtr pkt,
 525                                   CachePort *otherSidePort)
 526 {
 527     Addr blk_addr = pkt->getAddr() & ~(blkSize - 1);
 528     BlkType *blk = tags->findBlock(pkt->getAddr());
 529
 530     if (blk && pkt->checkFunctional(blk_addr, blkSize, blk->data)) {
 531         // request satisfied from block
 532         return;
 533     }
 534
 535     // Need to check for outstanding misses and writes
 536
 537     // There can only be one matching outstanding miss.
 538     MSHR *mshr = mshrQueue.findMatch(blk_addr);
 539     if (mshr) {
 540         MSHR::TargetList *targets = mshr->getTargetList();
 541         MSHR::TargetList::iterator i = targets->begin();
 542         MSHR::TargetList::iterator end = targets->end();
 543         for (; i != end; ++i) {
 544             PacketPtr targetPkt = i->pkt;
 545             if (pkt->checkFunctional(targetPkt))
 546                 return;
 547         }
 548     }
 549
 550     // There can be many matching outstanding writes.
 551     std::vector<MSHR*> writes;
 552     assert(!writeBuffer.findMatches(blk_addr, writes));
 553 /*  Need to change this to iterate through targets in mshr??
 554     for (int i = 0; i < writes.size(); ++i) {
 555         MSHR *mshr = writes[i];
 556         if (pkt->checkFunctional(mshr->addr, mshr->size, mshr->writeData))
 557             return;
 558     }
 559 */
 560
 561     otherSidePort->checkAndSendFunctional(pkt);
 562 }
 563
 564
 565 /////////////////////////////////////////////////////
 566 //
 567 // Response handling: responses from the memory side
 568 //
 569 /////////////////////////////////////////////////////
 570
 571
 572 template<class TagStore>
 573 bool
 574 Cache<TagStore>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
 575                              BlkType *blk)
 576 {
 577     // respond to MSHR targets, if any
 578
 579     // First offset for critical word first calculations
 580     int initial_offset = 0;
 581
 582     if (mshr->hasTargets()) {
 583         initial_offset = mshr->getTarget()->pkt->getOffset(blkSize);
 584     }
 585
 586     while (mshr->hasTargets()) {
 587         MSHR::Target *target = mshr->getTarget();
 588
 589         if (target->isCpuSide()) {
 590             satisfyCpuSideRequest(target->pkt, blk);
 591             // How many bytes pass the first request is this one
 592             int transfer_offset =
 593                 target->pkt->getOffset(blkSize) - initial_offset;
 594             if (transfer_offset < 0) {
 595                 transfer_offset += blkSize;
 596             }
 597
 598             // If critical word (no offset) return first word time
 599             Tick completion_time = tags->getHitLatency() +
 600                 transfer_offset ? pkt->finishTime : pkt->firstWordTime;
 601
 602             if (!target->pkt->req->isUncacheable()) {
 603                 missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
 604                     completion_time - target->recvTime;
 605             }
 606             target->pkt->makeTimingResponse();
 607             cpuSidePort->respond(target->pkt, completion_time);
 608         } else {
 609             // response to snoop request
 610             DPRINTF(Cache, "processing deferred snoop...\n");
 611             handleSnoop(target->pkt, blk, true, true);
 612         }
 613
 614         mshr->popTarget();
 615     }
 616
 617     if (mshr->promoteDeferredTargets()) {
 618         MSHRQueue *mq = mshr->queue;
 619         mq->markPending(mshr);
 620         requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
 621         return false;
 622     }
 623
 624     return true;
 625 }
 626
 627
 628 template<class TagStore>
 629 void
 630 Cache<TagStore>::handleResponse(PacketPtr pkt)
 631 {
 632     Tick time = curTick + hitLatency;
 633     MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
 634     assert(mshr);
 635
 636     if (pkt->wasNacked()) {
 637         //pkt->reinitFromRequest();
 638         warn("NACKs from devices not connected to the same bus "
 639              "not implemented\n");
 640         return;
 641     }
 642     assert(!pkt->isError());
 643     DPRINTF(Cache, "Handling response to %x\n", pkt->getAddr());
 644
 645     MSHRQueue *mq = mshr->queue;
 646     bool wasFull = mq->isFull();
 647
 648     if (mshr == noTargetMSHR) {
 649         // we always clear at least one target
 650         clearBlocked(Blocked_NoTargets);
 651         noTargetMSHR = NULL;
 652     }
 653
 654     // Can we deallocate MSHR when done?
 655     bool deallocate = false;
 656
 657     // Initial target is used just for stats
 658     MSHR::Target *initial_tgt = mshr->getTarget();
 659     int stats_cmd_idx = initial_tgt->pkt->cmdToIndex();
 660     Tick miss_latency = curTick - initial_tgt->recvTime;
 661
 662     if (mshr->isCacheFill) {
 663         mshr_miss_latency[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] +=
 664             miss_latency;
 665         DPRINTF(Cache, "Block for addr %x being updated in Cache\n",
 666                 pkt->getAddr());
 667         BlkType *blk = tags->findBlock(pkt->getAddr());
 668
 669         // give mshr a chance to do some dirty work
 670         mshr->handleFill(pkt, blk);
 671
 672         PacketList writebacks;
 673         blk = handleFill(pkt, blk, writebacks);
 674         deallocate = satisfyMSHR(mshr, pkt, blk);
 675         // copy writebacks to write buffer
 676         while (!writebacks.empty()) {
 677             PacketPtr wbPkt = writebacks.front();
 678             allocateBuffer(wbPkt, time, true);
 679             writebacks.pop_front();
 680         }
 681         // if we used temp block, clear it out
 682         if (blk == tempBlock) {
 683             if (blk->isDirty()) {
 684                 allocateBuffer(writebackBlk(blk), time, true);
 685             }
 686             tags->invalidateBlk(blk);
 687         }
 688     } else {
 689         if (pkt->req->isUncacheable()) {
 690             mshr_uncacheable_lat[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] +=
 691                 miss_latency;
 692         }
 693
 694         while (mshr->hasTargets()) {
 695             MSHR::Target *target = mshr->getTarget();
 696             assert(target->isCpuSide());
 697             mshr->popTarget();
 698             if (pkt->isRead()) {
 699                 target->pkt->setData(pkt->getPtr<uint8_t>());
 700             }
 701             target->pkt->makeTimingResponse();
 702             cpuSidePort->respond(target->pkt, time);
 703         }
 704         assert(!mshr->hasTargets());
 705         deallocate = true;
 706     }
 707
 708     delete pkt;
 709
 710     if (deallocate) {
 711         mq->deallocate(mshr);
 712         if (wasFull && !mq->isFull()) {
 713             clearBlocked((BlockedCause)mq->index);
 714         }
 715     }
 716 }
 717
 718
 719
 720
 721 template<class TagStore>
 722 PacketPtr
 723 Cache<TagStore>::writebackBlk(BlkType *blk)
 724 {
 725     assert(blk && blk->isValid() && blk->isDirty());
 726
 727     writebacks[0/*pkt->req->getThreadNum()*/]++;
 728
 729     Request *writebackReq =
 730         new Request(tags->regenerateBlkAddr(blk->tag, blk->set), blkSize, 0);
 731     PacketPtr writeback = new Packet(writebackReq, MemCmd::Writeback, -1);
 732     writeback->allocate();
 733     std::memcpy(writeback->getPtr<uint8_t>(), blk->data, blkSize);
 734
 735     blk->status &= ~BlkDirty;
 736     return writeback;
 737 }
 738
 739
 740 // Note that the reason we return a list of writebacks rather than
 741 // inserting them directly in the write buffer is that this function
 742 // is called by both atomic and timing-mode accesses, and in atomic
 743 // mode we don't mess with the write buffer (we just perform the
 744 // writebacks atomically once the original request is complete).
 745 template<class TagStore>
 746 typename Cache<TagStore>::BlkType*
 747 Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk,
 748                             PacketList &writebacks)
 749 {
 750     Addr addr = pkt->getAddr();
 751     CacheBlk::State old_state = blk ? blk->status : 0;
 752
 753     if (blk == NULL) {
 754         // better have read new data...
 755         assert(pkt->isRead());
 756
 757         // need to do a replacement
 758         blk = tags->findReplacement(addr, writebacks);
 759         if (blk->isValid()) {
 760             Addr repl_addr = tags->regenerateBlkAddr(blk->tag, blk->set);
 761             MSHR *repl_mshr = mshrQueue.findMatch(repl_addr);
 762             if (repl_mshr) {
 763                 // must be an outstanding upgrade request on block
 764                 // we're about to replace...
 765                 assert(!blk->isWritable());
 766                 assert(repl_mshr->needsExclusive);
 767                 // too hard to replace block with transient state;
 768                 // just use temporary storage to complete the current
 769                 // request and then get rid of it
 770                 assert(!tempBlock->isValid());
 771                 blk = tempBlock;
 772                 tempBlock->set = tags->extractSet(addr);
 773                 DPRINTF(Cache, "using temp block for %x\n", addr);
 774             } else {
 775                 DPRINTF(Cache, "replacement: replacing %x with %x: %s\n",
 776                         repl_addr, addr,
 777                         blk->isDirty() ? "writeback" : "clean");
 778
 779                 if (blk->isDirty()) {
 780                     // Save writeback packet for handling by caller
 781                     writebacks.push_back(writebackBlk(blk));
 782                 }
 783             }
 784         }
 785
 786         blk->tag = tags->extractTag(addr);
 787     } else {
 788         // existing block... probably an upgrade
 789         assert(blk->tag == tags->extractTag(addr));
 790         // either we're getting new data or the block should already be valid
 791         assert(pkt->isRead() || blk->isValid());
 792     }
 793
 794     if (pkt->needsExclusive()) {
 795         blk->status = BlkValid | BlkWritable | BlkDirty;
 796     } else if (!pkt->sharedAsserted()) {
 797         blk->status = BlkValid | BlkWritable;
 798     } else {
 799         blk->status = BlkValid;
 800     }
 801
 802     DPRINTF(Cache, "Block addr %x moving from state %i to %i\n",
 803             addr, old_state, blk->status);
 804
 805     // if we got new data, copy it in
 806     if (pkt->isRead()) {
 807         std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize);
 808     }
 809
 810     blk->whenReady = pkt->finishTime;
 811
 812     return blk;
 813 }
 814
 815
 816 /////////////////////////////////////////////////////
 817 //
 818 // Snoop path: requests coming in from the memory side
 819 //
 820 /////////////////////////////////////////////////////
 821
 822 template<class TagStore>
 823 void
 824 Cache<TagStore>::doTimingSupplyResponse(PacketPtr req_pkt,
 825                                         uint8_t *blk_data,
 826                                         bool already_copied)
 827 {
 828     // timing-mode snoop responses require a new packet, unless we
 829     // already made a copy...
 830     PacketPtr pkt = already_copied ? req_pkt : new Packet(req_pkt);
 831     pkt->allocate();
 832     pkt->makeTimingResponse();
 833     pkt->setDataFromBlock(blk_data, blkSize);
 834     memSidePort->respond(pkt, curTick + hitLatency);
 835 }
 836
 837 template<class TagStore>
 838 void
 839 Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
 840                              bool is_timing, bool is_deferred)
 841 {
 842     if (!blk || !blk->isValid()) {
 843         return;
 844     }
 845
 846     // we may end up modifying both the block state and the packet (if
 847     // we respond in atomic mode), so just figure out what to do now
 848     // and then do it later
 849     bool supply = blk->isDirty() && pkt->isRead();
 850     bool invalidate = pkt->isInvalidate();
 851
 852     if (pkt->isRead() && !pkt->isInvalidate()) {
 853         assert(!pkt->needsExclusive());
 854         pkt->assertShared();
 855         int bits_to_clear = BlkWritable;
 856         const bool haveOwnershipState = true; // for now
 857         if (!haveOwnershipState) {
 858             // if we don't support pure ownership (dirty && !writable),
 859             // have to clear dirty bit here, assume memory snarfs data
 860             // on cache-to-cache xfer
 861             bits_to_clear |= BlkDirty;
 862         }
 863         blk->status &= ~bits_to_clear;
 864     }
 865
 866     if (supply) {
 867         assert(!pkt->memInhibitAsserted());
 868         pkt->assertMemInhibit();
 869         if (is_timing) {
 870             doTimingSupplyResponse(pkt, blk->data, is_deferred);
 871         } else {
 872             pkt->makeAtomicResponse();
 873             pkt->setDataFromBlock(blk->data, blkSize);
 874         }
 875     }
 876
 877     // Do this last in case it deallocates block data or something
 878     // like that
 879     if (invalidate) {
 880         tags->invalidateBlk(blk);
 881     }
 882
 883     DPRINTF(Cache, "snooped a %s request for addr %x, %snew state is %i\n",
 884             pkt->cmdString(), blockAlign(pkt->getAddr()),
 885             supply ? "supplying data, " : "", blk->status);
 886 }
 887
 888
 889 template<class TagStore>
 890 void
 891 Cache<TagStore>::snoopTiming(PacketPtr pkt)
 892 {
 893     if (pkt->req->isUncacheable()) {
 894         //Can't get a hit on an uncacheable address
 895         //Revisit this for multi level coherence
 896         return;
 897     }
 898
 899     BlkType *blk = tags->findBlock(pkt->getAddr());
 900
 901     Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
 902     MSHR *mshr = mshrQueue.findMatch(blk_addr);
 903     // better not be snooping a request that conflicts with something
 904     // we have outstanding...
 905     if (mshr && mshr->inService) {
 906         DPRINTF(Cache, "Deferring snoop on in-service MSHR to blk %x\n",
 907                 blk_addr);
 908         mshr->allocateSnoopTarget(pkt, curTick, order++);
 909         if (mshr->getNumTargets() > numTarget)
 910            warn("allocating bonus target for snoop"); //handle later
 911         return;
 912     }
 913
 914     //We also need to check the writeback buffers and handle those
 915     std::vector<MSHR *> writebacks;
 916     if (writeBuffer.findMatches(blk_addr, writebacks)) {
 917         DPRINTF(Cache, "Snoop hit in writeback to addr: %x\n",
 918                 pkt->getAddr());
 919
 920         //Look through writebacks for any non-uncachable writes, use that
 921         for (int i=0; i<writebacks.size(); i++) {
 922             mshr = writebacks[i];
 923             assert(!mshr->isUncacheable());
 924             assert(mshr->getNumTargets() == 1);
 925             PacketPtr wb_pkt = mshr->getTarget()->pkt;
 926             assert(wb_pkt->cmd == MemCmd::Writeback);
 927
 928             if (pkt->isRead()) {
 929                 assert(!pkt->memInhibitAsserted());
 930                 pkt->assertMemInhibit();
 931                 if (!pkt->needsExclusive()) {
 932                     pkt->assertShared();
 933                 } else {
 934                     // if we're not asserting the shared line, we need to
 935                     // invalidate our copy.  we'll do that below as long as
 936                     // the packet's invalidate flag is set...
 937                     assert(pkt->isInvalidate());
 938                 }
 939                 doTimingSupplyResponse(pkt, wb_pkt->getPtr<uint8_t>(), false);
 940             }
 941
 942             if (pkt->isInvalidate()) {
 943                 // Invalidation trumps our writeback... discard here
 944                 markInService(mshr);
 945             }
 946             return;
 947         }
 948     }
 949
 950     handleSnoop(pkt, blk, true, false);
 951 }
 952
 953
 954 template<class TagStore>
 955 Tick
 956 Cache<TagStore>::snoopAtomic(PacketPtr pkt)
 957 {
 958     if (pkt->req->isUncacheable()) {
 959         // Can't get a hit on an uncacheable address
 960         // Revisit this for multi level coherence
 961         return hitLatency;
 962     }
 963
 964     BlkType *blk = tags->findBlock(pkt->getAddr());
 965     handleSnoop(pkt, blk, false, false);
 966     return hitLatency;
 967 }
 968
 969
 970 template<class TagStore>
 971 MSHR *
 972 Cache<TagStore>::getNextMSHR()
 973 {
 974     // Check both MSHR queue and write buffer for potential requests
 975     MSHR *miss_mshr  = mshrQueue.getNextMSHR();
 976     MSHR *write_mshr = writeBuffer.getNextMSHR();
 977
 978     // Now figure out which one to send... some cases are easy
 979     if (miss_mshr && !write_mshr) {
 980         return miss_mshr;
 981     }
 982     if (write_mshr && !miss_mshr) {
 983         return write_mshr;
 984     }
 985
 986     if (miss_mshr && write_mshr) {
 987         // We have one of each... normally we favor the miss request
 988         // unless the write buffer is full
 989         if (writeBuffer.isFull() && writeBuffer.inServiceEntries == 0) {
 990             // Write buffer is full, so we'd like to issue a write;
 991             // need to search MSHR queue for conflicting earlier miss.
 992             MSHR *conflict_mshr =
 993                 mshrQueue.findPending(write_mshr->addr, write_mshr->size);
 994
 995             if (conflict_mshr && conflict_mshr->order < write_mshr->order) {
 996                 // Service misses in order until conflict is cleared.
 997                 return conflict_mshr;
 998             }
 999
1000             // No conflicts; issue write
1001             return write_mshr;
1002         }
1003
1004         // Write buffer isn't full, but need to check it for
1005         // conflicting earlier writeback
1006         MSHR *conflict_mshr =
1007             writeBuffer.findPending(miss_mshr->addr, miss_mshr->size);
1008         if (conflict_mshr) {
1009             // not sure why we don't check order here... it was in the
1010             // original code but commented out.
1011
1012             // The only way this happens is if we are
1013             // doing a write and we didn't have permissions
1014             // then subsequently saw a writeback (owned got evicted)
1015             // We need to make sure to perform the writeback first
1016             // To preserve the dirty data, then we can issue the write
1017
1018             // should we return write_mshr here instead?  I.e. do we
1019             // have to flush writes in order?  I don't think so... not
1020             // for Alpha anyway.  Maybe for x86?
1021             return conflict_mshr;
1022         }
1023
1024         // No conflicts; issue read
1025         return miss_mshr;
1026     }
1027
1028     // fall through... no pending requests.  Try a prefetch.
1029     assert(!miss_mshr && !write_mshr);
1030     if (!mshrQueue.isFull()) {
1031         // If we have a miss queue slot, we can try a prefetch
1032         PacketPtr pkt = prefetcher->getPacket();
1033         if (pkt) {
1034             // Update statistic on number of prefetches issued
1035             // (hwpf_mshr_misses)
1036             mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
1037             // Don't request bus, since we already have it
1038             return allocateMissBuffer(pkt, curTick, false);
1039         }
1040     }
1041
1042     return NULL;
1043 }
1044
1045
1046 template<class TagStore>
1047 PacketPtr
1048 Cache<TagStore>::getTimingPacket()
1049 {
1050     MSHR *mshr = getNextMSHR();
1051
1052     if (mshr == NULL) {
1053         return NULL;
1054     }
1055
1056     // use request from 1st target
1057     PacketPtr tgt_pkt = mshr->getTarget()->pkt;
1058     PacketPtr pkt = NULL;
1059
1060     if (mshr->isSimpleForward()) {
1061         // no response expected, just forward packet as it is
1062         assert(tags->findBlock(mshr->addr) == NULL);
1063         pkt = tgt_pkt;
1064     } else {
1065         BlkType *blk = tags->findBlock(mshr->addr);
1066         pkt = getBusPacket(tgt_pkt, blk, mshr->needsExclusive);
1067
1068         mshr->isCacheFill = (pkt != NULL);
1069
1070         if (pkt == NULL) {
1071             // not a cache block request, but a response is expected
1072             assert(!mshr->isSimpleForward());
1073             // make copy of current packet to forward, keep current
1074             // copy for response handling
1075             pkt = new Packet(tgt_pkt);
1076             pkt->allocate();
1077             if (pkt->isWrite()) {
1078                 pkt->setData(tgt_pkt->getPtr<uint8_t>());
1079             }
1080         }
1081     }
1082
1083     assert(pkt != NULL);
1084     pkt->senderState = mshr;
1085     return pkt;
1086 }
1087
1088
1089 ///////////////
1090 //
1091 // CpuSidePort
1092 //
1093 ///////////////
1094
1095 template<class TagStore>
1096 void
1097 Cache<TagStore>::CpuSidePort::
1098 getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
1099 {
1100     // CPU side port doesn't snoop; it's a target only.
1101     bool dummy;
1102     otherPort->getPeerAddressRanges(resp, dummy);
1103     snoop = false;
1104 }
1105
1106
1107 template<class TagStore>
1108 bool
1109 Cache<TagStore>::CpuSidePort::recvTiming(PacketPtr pkt)
1110 {
1111     if (pkt->isRequest() && blocked) {
1112         DPRINTF(Cache,"Scheduling a retry while blocked\n");
1113         mustSendRetry = true;
1114         return false;
1115     }
1116
1117     myCache()->timingAccess(pkt);
1118     return true;
1119 }
1120
1121
1122 template<class TagStore>
1123 Tick
1124 Cache<TagStore>::CpuSidePort::recvAtomic(PacketPtr pkt)
1125 {
1126     return myCache()->atomicAccess(pkt);
1127 }
1128
1129
1130 template<class TagStore>
1131 void
1132 Cache<TagStore>::CpuSidePort::recvFunctional(PacketPtr pkt)
1133 {
1134     checkFunctional(pkt);
1135     if (!pkt->isResponse())
1136         myCache()->functionalAccess(pkt, cache->memSidePort);
1137 }
1138
1139
1140 template<class TagStore>
1141 Cache<TagStore>::
1142 CpuSidePort::CpuSidePort(const std::string &_name,
1143                          Cache<TagStore> *_cache)
1144     : BaseCache::CachePort(_name, _cache)
1145 {
1146 }
1147
1148 ///////////////
1149 //
1150 // MemSidePort
1151 //
1152 ///////////////
1153
1154 template<class TagStore>
1155 void
1156 Cache<TagStore>::MemSidePort::
1157 getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
1158 {
1159     otherPort->getPeerAddressRanges(resp, snoop);
1160     // Memory-side port always snoops, so unconditionally set flag for
1161     // caller.
1162     snoop = true;
1163 }
1164
1165
1166 template<class TagStore>
1167 bool
1168 Cache<TagStore>::MemSidePort::recvTiming(PacketPtr pkt)
1169 {
1170     // this needs to be fixed so that the cache updates the mshr and sends the
1171     // packet back out on the link, but it probably won't happen so until this
1172     // gets fixed, just panic when it does
1173     if (pkt->wasNacked())
1174         panic("Need to implement cache resending nacked packets!\n");
1175
1176     if (pkt->isRequest() && blocked) {
1177         DPRINTF(Cache,"Scheduling a retry while blocked\n");
1178         mustSendRetry = true;
1179         return false;
1180     }
1181
1182     if (pkt->isResponse()) {
1183         myCache()->handleResponse(pkt);
1184     } else {
1185         myCache()->snoopTiming(pkt);
1186     }
1187     return true;
1188 }
1189
1190
1191 template<class TagStore>
1192 Tick
1193 Cache<TagStore>::MemSidePort::recvAtomic(PacketPtr pkt)
1194 {
1195     // in atomic mode, responses go back to the sender via the
1196     // function return from sendAtomic(), not via a separate
1197     // sendAtomic() from the responder.  Thus we should never see a
1198     // response packet in recvAtomic() (anywhere, not just here).
1199     assert(!pkt->isResponse());
1200     return myCache()->snoopAtomic(pkt);
1201 }
1202
1203
1204 template<class TagStore>
1205 void
1206 Cache<TagStore>::MemSidePort::recvFunctional(PacketPtr pkt)
1207 {
1208     checkFunctional(pkt);
1209     if (!pkt->isResponse())
1210         myCache()->functionalAccess(pkt, cache->cpuSidePort);
1211 }
1212
1213
1214
1215 template<class TagStore>
1216 void
1217 Cache<TagStore>::MemSidePort::sendPacket()
1218 {
1219     // if we have responses that are ready, they take precedence
1220     if (deferredPacketReady()) {
1221         bool success = sendTiming(transmitList.front().pkt);
1222
1223         if (success) {
1224             //send successful, remove packet
1225             transmitList.pop_front();
1226         }
1227
1228         waitingOnRetry = !success;
1229     } else {
1230         // check for non-response packets (requests & writebacks)
1231         PacketPtr pkt = myCache()->getTimingPacket();
1232         if (pkt == NULL) {
1233             // can happen if e.g. we attempt a writeback and fail, but
1234             // before the retry, the writeback is eliminated because
1235             // we snoop another cache's ReadEx.
1236             waitingOnRetry = false;
1237         } else {
1238             MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
1239
1240             bool success = sendTiming(pkt);
1241             DPRINTF(CachePort,
1242                     "Address %x was %s in sending the timing request\n",
1243                     pkt->getAddr(), success ? "successful" : "unsuccessful");
1244
1245             waitingOnRetry = !success;
1246             if (waitingOnRetry) {
1247                 DPRINTF(CachePort, "now waiting on a retry\n");
1248                 if (!mshr->isSimpleForward()) {
1249                     delete pkt;
1250                 }
1251             } else {
1252                 myCache()->markInService(mshr);
1253             }
1254         }
1255     }
1256
1257
1258     // tried to send packet... if it was successful (no retry), see if
1259     // we need to rerequest bus or not
1260     if (!waitingOnRetry) {
1261         Tick nextReady = std::min(deferredPacketReadyTime(),
1262                                   myCache()->nextMSHRReadyTime());
1263         // @TODO: need to facotr in prefetch requests here somehow
1264         if (nextReady != MaxTick) {
1265             DPRINTF(CachePort, "more packets to send @ %d\n", nextReady);
1266             sendEvent->schedule(std::max(nextReady, curTick + 1));
1267         } else {
1268             // no more to send right now: if we're draining, we may be done
1269             if (drainEvent) {
1270                 drainEvent->process();
1271                 drainEvent = NULL;
1272             }
1273         }
1274     }
1275 }
1276
1277 template<class TagStore>
1278 void
1279 Cache<TagStore>::MemSidePort::recvRetry()
1280 {
1281     assert(waitingOnRetry);
1282     sendPacket();
1283 }
1284
1285
1286 template<class TagStore>
1287 void
1288 Cache<TagStore>::MemSidePort::processSendEvent()
1289 {
1290     assert(!waitingOnRetry);
1291     sendPacket();
1292 }
1293
1294
1295 template<class TagStore>
1296 Cache<TagStore>::
1297 MemSidePort::MemSidePort(const std::string &_name, Cache<TagStore> *_cache)
1298     : BaseCache::CachePort(_name, _cache)
1299 {
1300     // override default send event from SimpleTimingPort
1301     delete sendEvent;
1302     sendEvent = new SendEvent(this);
1303 }