src/cpu/o3/lsq_impl.hh

   1 /*
   2  * Copyright (c) 2011-2012, 2014, 2017-2018 ARM Limited
   3  * Copyright (c) 2013 Advanced Micro Devices, Inc.
   4  * All rights reserved
   5  *
   6  * The license below extends only to copyright in the software and shall
   7  * not be construed as granting a license to any other intellectual
   8  * property including but not limited to intellectual property relating
   9  * to a hardware implementation of the functionality of the software
  10  * licensed hereunder.  You may use the software subject to the license
  11  * terms below provided that you ensure that this notice is replicated
  12  * unmodified and in its entirety in all distributions of the software,
  13  * modified or unmodified, in source code or in binary form.
  14  *
  15  * Copyright (c) 2005-2006 The Regents of The University of Michigan
  16  * All rights reserved.
  17  *
  18  * Redistribution and use in source and binary forms, with or without
  19  * modification, are permitted provided that the following conditions are
  20  * met: redistributions of source code must retain the above copyright
  21  * notice, this list of conditions and the following disclaimer;
  22  * redistributions in binary form must reproduce the above copyright
  23  * notice, this list of conditions and the following disclaimer in the
  24  * documentation and/or other materials provided with the distribution;
  25  * neither the name of the copyright holders nor the names of its
  26  * contributors may be used to endorse or promote products derived from
  27  * this software without specific prior written permission.
  28  *
  29  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  30  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  31  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  32  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  33  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  34  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  35  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  36  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  37  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  38  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  39  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  40  *
  41  * Authors: Korey Sewell
  42  */
  43
  44 #ifndef __CPU_O3_LSQ_IMPL_HH__
  45 #define __CPU_O3_LSQ_IMPL_HH__
  46
  47 #include <algorithm>
  48 #include <list>
  49 #include <string>
  50
  51 #include "base/logging.hh"
  52 #include "cpu/o3/lsq.hh"
  53 #include "debug/Drain.hh"
  54 #include "debug/Fetch.hh"
  55 #include "debug/LSQ.hh"
  56 #include "debug/Writeback.hh"
  57 #include "params/DerivO3CPU.hh"
  58
  59 using namespace std;
  60
  61 template <class Impl>
  62 LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
  63     : cpu(cpu_ptr), iewStage(iew_ptr),
  64       _cacheBlocked(false),
  65       cacheStorePorts(params->cacheStorePorts), usedStorePorts(0),
  66       cacheLoadPorts(params->cacheLoadPorts), usedLoadPorts(0),
  67       lsqPolicy(params->smtLSQPolicy),
  68       LQEntries(params->LQEntries),
  69       SQEntries(params->SQEntries),
  70       maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params->numThreads,
  71                   params->smtLSQThreshold)),
  72       maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params->numThreads,
  73                   params->smtLSQThreshold)),
  74       numThreads(params->numThreads)
  75 {
  76     assert(numThreads > 0 && numThreads <= Impl::MaxThreads);
  77
  78     //**********************************************/
  79     //************ Handle SMT Parameters ***********/
  80     //**********************************************/
  81
  82     /* Run SMT olicy checks. */
  83         if (lsqPolicy == SMTQueuePolicy::Dynamic) {
  84         DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
  85     } else if (lsqPolicy == SMTQueuePolicy::Partitioned) {
  86         DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
  87                 "%i entries per LQ | %i entries per SQ\n",
  88                 maxLQEntries,maxSQEntries);
  89     } else if (lsqPolicy == SMTQueuePolicy::Threshold) {
  90
  91         assert(params->smtLSQThreshold > params->LQEntries);
  92         assert(params->smtLSQThreshold > params->SQEntries);
  93
  94         DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
  95                 "%i entries per LQ | %i entries per SQ\n",
  96                 maxLQEntries,maxSQEntries);
  97     } else {
  98         panic("Invalid LSQ sharing policy. Options are: Dynamic, "
  99                     "Partitioned, Threshold");
 100     }
 101
 102     thread.reserve(numThreads);
 103     for (ThreadID tid = 0; tid < numThreads; tid++) {
 104         thread.emplace_back(maxLQEntries, maxSQEntries);
 105         thread[tid].init(cpu, iew_ptr, params, this, tid);
 106         thread[tid].setDcachePort(&cpu_ptr->getDataPort());
 107     }
 108 }
 109
 110
 111 template<class Impl>
 112 std::string
 113 LSQ<Impl>::name() const
 114 {
 115     return iewStage->name() + ".lsq";
 116 }
 117
 118 template<class Impl>
 119 void
 120 LSQ<Impl>::regStats()
 121 {
 122     //Initialize LSQs
 123     for (ThreadID tid = 0; tid < numThreads; tid++) {
 124         thread[tid].regStats();
 125     }
 126 }
 127
 128 template<class Impl>
 129 void
 130 LSQ<Impl>::setActiveThreads(list<ThreadID> *at_ptr)
 131 {
 132     activeThreads = at_ptr;
 133     assert(activeThreads != 0);
 134 }
 135
 136 template <class Impl>
 137 void
 138 LSQ<Impl>::drainSanityCheck() const
 139 {
 140     assert(isDrained());
 141
 142     for (ThreadID tid = 0; tid < numThreads; tid++)
 143         thread[tid].drainSanityCheck();
 144 }
 145
 146 template <class Impl>
 147 bool
 148 LSQ<Impl>::isDrained() const
 149 {
 150     bool drained(true);
 151
 152     if (!lqEmpty()) {
 153         DPRINTF(Drain, "Not drained, LQ not empty.\n");
 154         drained = false;
 155     }
 156
 157     if (!sqEmpty()) {
 158         DPRINTF(Drain, "Not drained, SQ not empty.\n");
 159         drained = false;
 160     }
 161
 162     return drained;
 163 }
 164
 165 template <class Impl>
 166 void
 167 LSQ<Impl>::takeOverFrom()
 168 {
 169     usedStorePorts = 0;
 170     _cacheBlocked = false;
 171
 172     for (ThreadID tid = 0; tid < numThreads; tid++) {
 173         thread[tid].takeOverFrom();
 174     }
 175 }
 176
 177 template <class Impl>
 178 void
 179 LSQ<Impl>::tick()
 180 {
 181     // Re-issue loads which got blocked on the per-cycle load ports limit.
 182     if (usedLoadPorts == cacheLoadPorts && !_cacheBlocked)
 183         iewStage->cacheUnblocked();
 184
 185     usedLoadPorts = 0;
 186     usedStorePorts = 0;
 187 }
 188
 189 template<class Impl>
 190 bool
 191 LSQ<Impl>::cacheBlocked() const
 192 {
 193     return _cacheBlocked;
 194 }
 195
 196 template<class Impl>
 197 void
 198 LSQ<Impl>::cacheBlocked(bool v)
 199 {
 200     _cacheBlocked = v;
 201 }
 202
 203 template<class Impl>
 204 bool
 205 LSQ<Impl>::cachePortAvailable(bool is_load) const
 206 {
 207     bool ret;
 208     if (is_load) {
 209         ret  = usedLoadPorts < cacheLoadPorts;
 210     } else {
 211         ret  = usedStorePorts < cacheStorePorts;
 212     }
 213     return ret;
 214 }
 215
 216 template<class Impl>
 217 void
 218 LSQ<Impl>::cachePortBusy(bool is_load)
 219 {
 220     assert(cachePortAvailable(is_load));
 221     if (is_load) {
 222         usedLoadPorts++;
 223     } else {
 224         usedStorePorts++;
 225     }
 226 }
 227
 228 template<class Impl>
 229 void
 230 LSQ<Impl>::insertLoad(const DynInstPtr &load_inst)
 231 {
 232     ThreadID tid = load_inst->threadNumber;
 233
 234     thread[tid].insertLoad(load_inst);
 235 }
 236
 237 template<class Impl>
 238 void
 239 LSQ<Impl>::insertStore(const DynInstPtr &store_inst)
 240 {
 241     ThreadID tid = store_inst->threadNumber;
 242
 243     thread[tid].insertStore(store_inst);
 244 }
 245
 246 template<class Impl>
 247 Fault
 248 LSQ<Impl>::executeLoad(const DynInstPtr &inst)
 249 {
 250     ThreadID tid = inst->threadNumber;
 251
 252     return thread[tid].executeLoad(inst);
 253 }
 254
 255 template<class Impl>
 256 Fault
 257 LSQ<Impl>::executeStore(const DynInstPtr &inst)
 258 {
 259     ThreadID tid = inst->threadNumber;
 260
 261     return thread[tid].executeStore(inst);
 262 }
 263
 264 template<class Impl>
 265 void
 266 LSQ<Impl>::writebackStores()
 267 {
 268     list<ThreadID>::iterator threads = activeThreads->begin();
 269     list<ThreadID>::iterator end = activeThreads->end();
 270
 271     while (threads != end) {
 272         ThreadID tid = *threads++;
 273
 274         if (numStoresToWB(tid) > 0) {
 275             DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
 276                 "available for Writeback.\n", tid, numStoresToWB(tid));
 277         }
 278
 279         thread[tid].writebackStores();
 280     }
 281 }
 282
 283 template<class Impl>
 284 bool
 285 LSQ<Impl>::violation()
 286 {
 287     /* Answers: Does Anybody Have a Violation?*/
 288     list<ThreadID>::iterator threads = activeThreads->begin();
 289     list<ThreadID>::iterator end = activeThreads->end();
 290
 291     while (threads != end) {
 292         ThreadID tid = *threads++;
 293
 294         if (thread[tid].violation())
 295             return true;
 296     }
 297
 298     return false;
 299 }
 300
 301 template <class Impl>
 302 void
 303 LSQ<Impl>::recvReqRetry()
 304 {
 305     iewStage->cacheUnblocked();
 306     cacheBlocked(false);
 307
 308     for (ThreadID tid : *activeThreads) {
 309         thread[tid].recvRetry();
 310     }
 311 }
 312
 313 template <class Impl>
 314 void
 315 LSQ<Impl>::completeDataAccess(PacketPtr pkt)
 316 {
 317     auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
 318     thread[cpu->contextToThread(senderState->contextId())]
 319         .completeDataAccess(pkt);
 320 }
 321
 322 template <class Impl>
 323 bool
 324 LSQ<Impl>::recvTimingResp(PacketPtr pkt)
 325 {
 326     if (pkt->isError())
 327         DPRINTF(LSQ, "Got error packet back for address: %#X\n",
 328                 pkt->getAddr());
 329
 330     auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
 331     panic_if(!senderState, "Got packet back with unknown sender state\n");
 332
 333     thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt);
 334
 335     if (pkt->isInvalidate()) {
 336         // This response also contains an invalidate; e.g. this can be the case
 337         // if cmd is ReadRespWithInvalidate.
 338         //
 339         // The calling order between completeDataAccess and checkSnoop matters.
 340         // By calling checkSnoop after completeDataAccess, we ensure that the
 341         // fault set by checkSnoop is not lost. Calling writeback (more
 342         // specifically inst->completeAcc) in completeDataAccess overwrites
 343         // fault, and in case this instruction requires squashing (as
 344         // determined by checkSnoop), the ReExec fault set by checkSnoop would
 345         // be lost otherwise.
 346
 347         DPRINTF(LSQ, "received invalidation with response for addr:%#x\n",
 348                 pkt->getAddr());
 349
 350         for (ThreadID tid = 0; tid < numThreads; tid++) {
 351             thread[tid].checkSnoop(pkt);
 352         }
 353     }
 354     // Update the LSQRequest state (this may delete the request)
 355     senderState->request()->packetReplied();
 356
 357     return true;
 358 }
 359
 360 template <class Impl>
 361 void
 362 LSQ<Impl>::recvTimingSnoopReq(PacketPtr pkt)
 363 {
 364     DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(),
 365             pkt->cmdString());
 366
 367     // must be a snoop
 368     if (pkt->isInvalidate()) {
 369         DPRINTF(LSQ, "received invalidation for addr:%#x\n",
 370                 pkt->getAddr());
 371         for (ThreadID tid = 0; tid < numThreads; tid++) {
 372             thread[tid].checkSnoop(pkt);
 373         }
 374     }
 375 }
 376
 377 template<class Impl>
 378 int
 379 LSQ<Impl>::getCount()
 380 {
 381     unsigned total = 0;
 382
 383     list<ThreadID>::iterator threads = activeThreads->begin();
 384     list<ThreadID>::iterator end = activeThreads->end();
 385
 386     while (threads != end) {
 387         ThreadID tid = *threads++;
 388
 389         total += getCount(tid);
 390     }
 391
 392     return total;
 393 }
 394
 395 template<class Impl>
 396 int
 397 LSQ<Impl>::numLoads()
 398 {
 399     unsigned total = 0;
 400
 401     list<ThreadID>::iterator threads = activeThreads->begin();
 402     list<ThreadID>::iterator end = activeThreads->end();
 403
 404     while (threads != end) {
 405         ThreadID tid = *threads++;
 406
 407         total += numLoads(tid);
 408     }
 409
 410     return total;
 411 }
 412
 413 template<class Impl>
 414 int
 415 LSQ<Impl>::numStores()
 416 {
 417     unsigned total = 0;
 418
 419     list<ThreadID>::iterator threads = activeThreads->begin();
 420     list<ThreadID>::iterator end = activeThreads->end();
 421
 422     while (threads != end) {
 423         ThreadID tid = *threads++;
 424
 425         total += thread[tid].numStores();
 426     }
 427
 428     return total;
 429 }
 430
 431 template<class Impl>
 432 unsigned
 433 LSQ<Impl>::numFreeLoadEntries()
 434 {
 435     unsigned total = 0;
 436
 437     list<ThreadID>::iterator threads = activeThreads->begin();
 438     list<ThreadID>::iterator end = activeThreads->end();
 439
 440     while (threads != end) {
 441         ThreadID tid = *threads++;
 442
 443         total += thread[tid].numFreeLoadEntries();
 444     }
 445
 446     return total;
 447 }
 448
 449 template<class Impl>
 450 unsigned
 451 LSQ<Impl>::numFreeStoreEntries()
 452 {
 453     unsigned total = 0;
 454
 455     list<ThreadID>::iterator threads = activeThreads->begin();
 456     list<ThreadID>::iterator end = activeThreads->end();
 457
 458     while (threads != end) {
 459         ThreadID tid = *threads++;
 460
 461         total += thread[tid].numFreeStoreEntries();
 462     }
 463
 464     return total;
 465 }
 466
 467 template<class Impl>
 468 unsigned
 469 LSQ<Impl>::numFreeLoadEntries(ThreadID tid)
 470 {
 471         return thread[tid].numFreeLoadEntries();
 472 }
 473
 474 template<class Impl>
 475 unsigned
 476 LSQ<Impl>::numFreeStoreEntries(ThreadID tid)
 477 {
 478         return thread[tid].numFreeStoreEntries();
 479 }
 480
 481 template<class Impl>
 482 bool
 483 LSQ<Impl>::isFull()
 484 {
 485     list<ThreadID>::iterator threads = activeThreads->begin();
 486     list<ThreadID>::iterator end = activeThreads->end();
 487
 488     while (threads != end) {
 489         ThreadID tid = *threads++;
 490
 491         if (!(thread[tid].lqFull() || thread[tid].sqFull()))
 492             return false;
 493     }
 494
 495     return true;
 496 }
 497
 498 template<class Impl>
 499 bool
 500 LSQ<Impl>::isFull(ThreadID tid)
 501 {
 502     //@todo: Change to Calculate All Entries for
 503     //Dynamic Policy
 504     if (lsqPolicy == SMTQueuePolicy::Dynamic)
 505         return isFull();
 506     else
 507         return thread[tid].lqFull() || thread[tid].sqFull();
 508 }
 509
 510 template<class Impl>
 511 bool
 512 LSQ<Impl>::isEmpty() const
 513 {
 514     return lqEmpty() && sqEmpty();
 515 }
 516
 517 template<class Impl>
 518 bool
 519 LSQ<Impl>::lqEmpty() const
 520 {
 521     list<ThreadID>::const_iterator threads = activeThreads->begin();
 522     list<ThreadID>::const_iterator end = activeThreads->end();
 523
 524     while (threads != end) {
 525         ThreadID tid = *threads++;
 526
 527         if (!thread[tid].lqEmpty())
 528             return false;
 529     }
 530
 531     return true;
 532 }
 533
 534 template<class Impl>
 535 bool
 536 LSQ<Impl>::sqEmpty() const
 537 {
 538     list<ThreadID>::const_iterator threads = activeThreads->begin();
 539     list<ThreadID>::const_iterator end = activeThreads->end();
 540
 541     while (threads != end) {
 542         ThreadID tid = *threads++;
 543
 544         if (!thread[tid].sqEmpty())
 545             return false;
 546     }
 547
 548     return true;
 549 }
 550
 551 template<class Impl>
 552 bool
 553 LSQ<Impl>::lqFull()
 554 {
 555     list<ThreadID>::iterator threads = activeThreads->begin();
 556     list<ThreadID>::iterator end = activeThreads->end();
 557
 558     while (threads != end) {
 559         ThreadID tid = *threads++;
 560
 561         if (!thread[tid].lqFull())
 562             return false;
 563     }
 564
 565     return true;
 566 }
 567
 568 template<class Impl>
 569 bool
 570 LSQ<Impl>::lqFull(ThreadID tid)
 571 {
 572     //@todo: Change to Calculate All Entries for
 573     //Dynamic Policy
 574     if (lsqPolicy == SMTQueuePolicy::Dynamic)
 575         return lqFull();
 576     else
 577         return thread[tid].lqFull();
 578 }
 579
 580 template<class Impl>
 581 bool
 582 LSQ<Impl>::sqFull()
 583 {
 584     list<ThreadID>::iterator threads = activeThreads->begin();
 585     list<ThreadID>::iterator end = activeThreads->end();
 586
 587     while (threads != end) {
 588         ThreadID tid = *threads++;
 589
 590         if (!sqFull(tid))
 591             return false;
 592     }
 593
 594     return true;
 595 }
 596
 597 template<class Impl>
 598 bool
 599 LSQ<Impl>::sqFull(ThreadID tid)
 600 {
 601      //@todo: Change to Calculate All Entries for
 602     //Dynamic Policy
 603     if (lsqPolicy == SMTQueuePolicy::Dynamic)
 604         return sqFull();
 605     else
 606         return thread[tid].sqFull();
 607 }
 608
 609 template<class Impl>
 610 bool
 611 LSQ<Impl>::isStalled()
 612 {
 613     list<ThreadID>::iterator threads = activeThreads->begin();
 614     list<ThreadID>::iterator end = activeThreads->end();
 615
 616     while (threads != end) {
 617         ThreadID tid = *threads++;
 618
 619         if (!thread[tid].isStalled())
 620             return false;
 621     }
 622
 623     return true;
 624 }
 625
 626 template<class Impl>
 627 bool
 628 LSQ<Impl>::isStalled(ThreadID tid)
 629 {
 630     if (lsqPolicy == SMTQueuePolicy::Dynamic)
 631         return isStalled();
 632     else
 633         return thread[tid].isStalled();
 634 }
 635
 636 template<class Impl>
 637 bool
 638 LSQ<Impl>::hasStoresToWB()
 639 {
 640     list<ThreadID>::iterator threads = activeThreads->begin();
 641     list<ThreadID>::iterator end = activeThreads->end();
 642
 643     while (threads != end) {
 644         ThreadID tid = *threads++;
 645
 646         if (hasStoresToWB(tid))
 647             return true;
 648     }
 649
 650     return false;
 651 }
 652
 653 template<class Impl>
 654 bool
 655 LSQ<Impl>::willWB()
 656 {
 657     list<ThreadID>::iterator threads = activeThreads->begin();
 658     list<ThreadID>::iterator end = activeThreads->end();
 659
 660     while (threads != end) {
 661         ThreadID tid = *threads++;
 662
 663         if (willWB(tid))
 664             return true;
 665     }
 666
 667     return false;
 668 }
 669
 670 template<class Impl>
 671 void
 672 LSQ<Impl>::dumpInsts() const
 673 {
 674     list<ThreadID>::const_iterator threads = activeThreads->begin();
 675     list<ThreadID>::const_iterator end = activeThreads->end();
 676
 677     while (threads != end) {
 678         ThreadID tid = *threads++;
 679
 680         thread[tid].dumpInsts();
 681     }
 682 }
 683
 684 static Addr
 685 addrBlockOffset(Addr addr, unsigned int block_size)
 686 {
 687     return addr & (block_size - 1);
 688 }
 689
 690 static Addr
 691 addrBlockAlign(Addr addr, uint64_t block_size)
 692 {
 693     return addr & ~(block_size - 1);
 694 }
 695
 696 static bool
 697 transferNeedsBurst(Addr addr, uint64_t size, uint64_t block_size)
 698 {
 699     return (addrBlockOffset(addr, block_size) + size) > block_size;
 700 }
 701
 702 template<class Impl>
 703 Fault
 704 LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
 705                        unsigned int size, Addr addr, Request::Flags flags,
 706                        uint64_t *res, AtomicOpFunctor *amo_op)
 707 {
 708     // This comming request can be either load, store or atomic.
 709     // Atomic request has a corresponding pointer to its atomic memory
 710     // operation
 711     bool isAtomic M5_VAR_USED = !isLoad && amo_op;
 712
 713     ThreadID tid = cpu->contextToThread(inst->contextId());
 714     auto cacheLineSize = cpu->cacheLineSize();
 715     bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
 716     LSQRequest* req = nullptr;
 717
 718     // Atomic requests that access data across cache line boundary are
 719     // currently not allowed since the cache does not guarantee corresponding
 720     // atomic memory operations to be executed atomically across a cache line.
 721     // For ISAs such as x86 that supports cross-cache-line atomic instructions,
 722     // the cache needs to be modified to perform atomic update to both cache
 723     // lines. For now, such cross-line update is not supported.
 724     assert(!isAtomic || (isAtomic && !needs_burst));
 725
 726     if (inst->translationStarted()) {
 727         req = inst->savedReq;
 728         assert(req);
 729     } else {
 730         if (needs_burst) {
 731             req = new SplitDataRequest(&thread[tid], inst, isLoad, addr,
 732                     size, flags, data, res);
 733         } else {
 734             req = new SingleDataRequest(&thread[tid], inst, isLoad, addr,
 735                     size, flags, data, res, amo_op);
 736         }
 737         assert(req);
 738         inst->setRequest();
 739         req->taskId(cpu->taskId());
 740
 741         req->initiateTranslation();
 742     }
 743
 744     /* This is the place were instructions get the effAddr. */
 745     if (req->isTranslationComplete()) {
 746         if (inst->getFault() == NoFault) {
 747             inst->effAddr = req->getVaddr();
 748             inst->effSize = size;
 749             inst->effAddrValid(true);
 750
 751             if (cpu->checker) {
 752                 inst->reqToVerify = std::make_shared<Request>(*req->request());
 753             }
 754             if (isLoad)
 755                 inst->getFault() = cpu->read(req, inst->lqIdx);
 756             else
 757                 inst->getFault() = cpu->write(req, data, inst->sqIdx);
 758         } else if (isLoad) {
 759             // Commit will have to clean up whatever happened.  Set this
 760             // instruction as executed.
 761             inst->setExecuted();
 762         }
 763     }
 764
 765     if (inst->traceData)
 766         inst->traceData->setMem(addr, size, flags);
 767
 768     return inst->getFault();
 769 }
 770
 771 template<class Impl>
 772 void
 773 LSQ<Impl>::SingleDataRequest::finish(const Fault &fault, const RequestPtr &req,
 774         ThreadContext* tc, BaseTLB::Mode mode)
 775 {
 776     _fault.push_back(fault);
 777     numInTranslationFragments = 0;
 778     numTranslatedFragments = 1;
 779     /* If the instruction has been squahsed, let the request know
 780      * as it may have to self-destruct. */
 781     if (_inst->isSquashed()) {
 782         this->squashTranslation();
 783     } else {
 784         _inst->strictlyOrdered(req->isStrictlyOrdered());
 785
 786         flags.set(Flag::TranslationFinished);
 787         if (fault == NoFault) {
 788             _inst->physEffAddr = req->getPaddr();
 789             _inst->memReqFlags = req->getFlags();
 790             if (req->isCondSwap()) {
 791                 assert(_res);
 792                 req->setExtraData(*_res);
 793             }
 794             setState(State::Request);
 795         } else {
 796             setState(State::Fault);
 797         }
 798
 799         LSQRequest::_inst->fault = fault;
 800         LSQRequest::_inst->translationCompleted(true);
 801     }
 802 }
 803
 804 template<class Impl>
 805 void
 806 LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req,
 807         ThreadContext* tc, BaseTLB::Mode mode)
 808 {
 809     _fault.push_back(fault);
 810     assert(req == _requests[numTranslatedFragments] || this->isDelayed());
 811
 812     numInTranslationFragments--;
 813     numTranslatedFragments++;
 814
 815     mainReq->setFlags(req->getFlags());
 816
 817     if (numTranslatedFragments == _requests.size()) {
 818         if (_inst->isSquashed()) {
 819             this->squashTranslation();
 820         } else {
 821             _inst->strictlyOrdered(mainReq->isStrictlyOrdered());
 822             flags.set(Flag::TranslationFinished);
 823             auto fault_it = _fault.begin();
 824             /* Ffwd to the first NoFault. */
 825             while (fault_it != _fault.end() && *fault_it == NoFault)
 826                 fault_it++;
 827             /* If none of the fragments faulted: */
 828             if (fault_it == _fault.end()) {
 829                 _inst->physEffAddr = request(0)->getPaddr();
 830
 831                 _inst->memReqFlags = mainReq->getFlags();
 832                 if (mainReq->isCondSwap()) {
 833                     assert(_res);
 834                     mainReq->setExtraData(*_res);
 835                 }
 836                 setState(State::Request);
 837                 _inst->fault = NoFault;
 838             } else {
 839                 setState(State::Fault);
 840                 _inst->fault = *fault_it;
 841             }
 842             _inst->translationCompleted(true);
 843         }
 844     }
 845 }
 846
 847 template<class Impl>
 848 void
 849 LSQ<Impl>::SingleDataRequest::initiateTranslation()
 850 {
 851     _inst->translationStarted(true);
 852     setState(State::Translation);
 853     flags.set(Flag::TranslationStarted);
 854
 855     _inst->savedReq = this;
 856     sendFragmentToTranslation(0);
 857
 858     if (isTranslationComplete()) {
 859     }
 860 }
 861
 862 template<class Impl>
 863 PacketPtr
 864 LSQ<Impl>::SplitDataRequest::mainPacket()
 865 {
 866     return _mainPacket;
 867 }
 868
 869 template<class Impl>
 870 RequestPtr
 871 LSQ<Impl>::SplitDataRequest::mainRequest()
 872 {
 873     return mainReq;
 874 }
 875
 876 template<class Impl>
 877 void
 878 LSQ<Impl>::SplitDataRequest::initiateTranslation()
 879 {
 880     _inst->translationStarted(true);
 881     setState(State::Translation);
 882     flags.set(Flag::TranslationStarted);
 883
 884     unsigned int cacheLineSize = _port.cacheLineSize();
 885     Addr base_addr = _addr;
 886     Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
 887     Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
 888     uint32_t size_so_far = 0;
 889
 890     mainReq = std::make_shared<Request>(_inst->getASID(), base_addr,
 891                 _size, _flags, _inst->masterId(),
 892                 _inst->instAddr(), _inst->contextId());
 893
 894     // Paddr is not used in mainReq. However, we will accumulate the flags
 895     // from the sub requests into mainReq by calling setFlags() in finish().
 896     // setFlags() assumes that paddr is set so flip the paddr valid bit here to
 897     // avoid a potential assert in setFlags() when we call it from  finish().
 898     mainReq->setPaddr(0);
 899
 900     /* Get the pre-fix, possibly unaligned. */
 901     _requests.push_back(std::make_shared<Request>(_inst->getASID(), base_addr,
 902                 next_addr - base_addr, _flags, _inst->masterId(),
 903                 _inst->instAddr(), _inst->contextId()));
 904     size_so_far = next_addr - base_addr;
 905
 906     /* We are block aligned now, reading whole blocks. */
 907     base_addr = next_addr;
 908     while (base_addr != final_addr) {
 909         _requests.push_back(std::make_shared<Request>(_inst->getASID(),
 910                     base_addr, cacheLineSize, _flags, _inst->masterId(),
 911                     _inst->instAddr(), _inst->contextId()));
 912         size_so_far += cacheLineSize;
 913         base_addr += cacheLineSize;
 914     }
 915
 916     /* Deal with the tail. */
 917     if (size_so_far < _size) {
 918         _requests.push_back(std::make_shared<Request>(_inst->getASID(),
 919                     base_addr, _size - size_so_far, _flags, _inst->masterId(),
 920                     _inst->instAddr(), _inst->contextId()));
 921     }
 922
 923     /* Setup the requests and send them to translation. */
 924     for (auto& r: _requests) {
 925         r->setReqInstSeqNum(_inst->seqNum);
 926         r->taskId(_taskId);
 927     }
 928     this->_inst->savedReq = this;
 929     numInTranslationFragments = 0;
 930     numTranslatedFragments = 0;
 931
 932     for (uint32_t i = 0; i < _requests.size(); i++) {
 933         sendFragmentToTranslation(i);
 934     }
 935 }
 936
 937 template<class Impl>
 938 void
 939 LSQ<Impl>::LSQRequest::sendFragmentToTranslation(int i)
 940 {
 941     numInTranslationFragments++;
 942     _port.dTLB()->translateTiming(
 943             this->request(i),
 944             this->_inst->thread->getTC(), this,
 945             this->isLoad() ? BaseTLB::Read : BaseTLB::Write);
 946 }
 947
 948 template<class Impl>
 949 bool
 950 LSQ<Impl>::SingleDataRequest::recvTimingResp(PacketPtr pkt)
 951 {
 952     assert(_numOutstandingPackets == 1);
 953     auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
 954     setState(State::Complete);
 955     flags.set(Flag::Complete);
 956     state->outstanding--;
 957     assert(pkt == _packets.front());
 958     _port.completeDataAccess(pkt);
 959     return true;
 960 }
 961
 962 template<class Impl>
 963 bool
 964 LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt)
 965 {
 966     auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
 967     uint32_t pktIdx = 0;
 968     while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
 969         pktIdx++;
 970     assert(pktIdx < _packets.size());
 971     assert(pkt->req == _requests[pktIdx]);
 972     assert(pkt == _packets[pktIdx]);
 973     numReceivedPackets++;
 974     state->outstanding--;
 975     if (numReceivedPackets == _packets.size()) {
 976         setState(State::Complete);
 977         flags.set(Flag::Complete);
 978         /* Assemble packets. */
 979         PacketPtr resp = isLoad()
 980             ? Packet::createRead(mainReq)
 981             : Packet::createWrite(mainReq);
 982         if (isLoad())
 983             resp->dataStatic(_inst->memData);
 984         else
 985             resp->dataStatic(_data);
 986         resp->senderState = _senderState;
 987         _port.completeDataAccess(resp);
 988         delete resp;
 989     }
 990     return true;
 991 }
 992
 993 template<class Impl>
 994 void
 995 LSQ<Impl>::SingleDataRequest::buildPackets()
 996 {
 997     assert(_senderState);
 998     /* Retries do not create new packets. */
 999     if (_packets.size() == 0) {
1000         _packets.push_back(
1001                 isLoad()
1002                     ?  Packet::createRead(request())
1003                     :  Packet::createWrite(request()));
1004         _packets.back()->dataStatic(_inst->memData);
1005         _packets.back()->senderState = _senderState;
1006     }
1007     assert(_packets.size() == 1);
1008 }
1009
1010 template<class Impl>
1011 void
1012 LSQ<Impl>::SplitDataRequest::buildPackets()
1013 {
1014     /* Extra data?? */
1015     ptrdiff_t offset = 0;
1016     if (_packets.size() == 0) {
1017         /* New stuff */
1018         if (isLoad()) {
1019             _mainPacket = Packet::createRead(mainReq);
1020             _mainPacket->dataStatic(_inst->memData);
1021         }
1022         for (auto& r: _requests) {
1023             PacketPtr pkt = isLoad() ? Packet::createRead(r)
1024                                     : Packet::createWrite(r);
1025             if (isLoad()) {
1026                 pkt->dataStatic(_inst->memData + offset);
1027             } else {
1028                 uint8_t* req_data = new uint8_t[r->getSize()];
1029                 std::memcpy(req_data,
1030                         _inst->memData + offset,
1031                         r->getSize());
1032                 pkt->dataDynamic(req_data);
1033             }
1034             offset += r->getSize();
1035             pkt->senderState = _senderState;
1036             _packets.push_back(pkt);
1037         }
1038     }
1039     assert(_packets.size() == _requests.size());
1040 }
1041
1042 template<class Impl>
1043 void
1044 LSQ<Impl>::SingleDataRequest::sendPacketToCache()
1045 {
1046     assert(_numOutstandingPackets == 0);
1047     if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
1048         _numOutstandingPackets = 1;
1049 }
1050
1051 template<class Impl>
1052 void
1053 LSQ<Impl>::SplitDataRequest::sendPacketToCache()
1054 {
1055     /* Try to send the packets. */
1056     while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
1057             lsqUnit()->trySendPacket(isLoad(),
1058                 _packets.at(numReceivedPackets + _numOutstandingPackets))) {
1059         _numOutstandingPackets++;
1060     }
1061 }
1062
1063 template<class Impl>
1064 void
1065 LSQ<Impl>::SingleDataRequest::handleIprWrite(ThreadContext *thread,
1066                                              PacketPtr pkt)
1067 {
1068     TheISA::handleIprWrite(thread, pkt);
1069 }
1070
1071 template<class Impl>
1072 void
1073 LSQ<Impl>::SplitDataRequest::handleIprWrite(ThreadContext *thread,
1074                                             PacketPtr mainPkt)
1075 {
1076     unsigned offset = 0;
1077     for (auto r: _requests) {
1078         PacketPtr pkt = new Packet(r, MemCmd::WriteReq);
1079         pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
1080         TheISA::handleIprWrite(thread, pkt);
1081         offset += r->getSize();
1082         delete pkt;
1083     }
1084 }
1085
1086 template<class Impl>
1087 Cycles
1088 LSQ<Impl>::SingleDataRequest::handleIprRead(ThreadContext *thread,
1089                                             PacketPtr pkt)
1090 {
1091     return TheISA::handleIprRead(thread, pkt);
1092 }
1093
1094 template<class Impl>
1095 Cycles
1096 LSQ<Impl>::SplitDataRequest::handleIprRead(ThreadContext *thread,
1097                                            PacketPtr mainPkt)
1098 {
1099     Cycles delay(0);
1100     unsigned offset = 0;
1101
1102     for (auto r: _requests) {
1103         PacketPtr pkt = new Packet(r, MemCmd::ReadReq);
1104         pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
1105         Cycles d = TheISA::handleIprRead(thread, pkt);
1106         if (d > delay)
1107             delay = d;
1108         offset += r->getSize();
1109         delete pkt;
1110     }
1111     return delay;
1112 }
1113
1114 template<class Impl>
1115 bool
1116 LSQ<Impl>::SingleDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
1117 {
1118     return ( (LSQRequest::_requests[0]->getPaddr() & blockMask) == blockAddr);
1119 }
1120
1121 template<class Impl>
1122 bool
1123 LSQ<Impl>::SplitDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
1124 {
1125     bool is_hit = false;
1126     for (auto &r: _requests) {
1127         if ((r->getPaddr() & blockMask) == blockAddr) {
1128             is_hit = true;
1129             break;
1130         }
1131     }
1132     return is_hit;
1133 }
1134
1135 #endif//__CPU_O3_LSQ_IMPL_HH__