src/cpu/o3/lsq.hh

   1 /*
   2  * Copyright (c) 2011-2012, 2014, 2018 ARM Limited
   3  * Copyright (c) 2013 Advanced Micro Devices, Inc.
   4  * All rights reserved
   5  *
   6  * The license below extends only to copyright in the software and shall
   7  * not be construed as granting a license to any other intellectual
   8  * property including but not limited to intellectual property relating
   9  * to a hardware implementation of the functionality of the software
  10  * licensed hereunder.  You may use the software subject to the license
  11  * terms below provided that you ensure that this notice is replicated
  12  * unmodified and in its entirety in all distributions of the software,
  13  * modified or unmodified, in source code or in binary form.
  14  *
  15  * Copyright (c) 2004-2006 The Regents of The University of Michigan
  16  * All rights reserved.
  17  *
  18  * Redistribution and use in source and binary forms, with or without
  19  * modification, are permitted provided that the following conditions are
  20  * met: redistributions of source code must retain the above copyright
  21  * notice, this list of conditions and the following disclaimer;
  22  * redistributions in binary form must reproduce the above copyright
  23  * notice, this list of conditions and the following disclaimer in the
  24  * documentation and/or other materials provided with the distribution;
  25  * neither the name of the copyright holders nor the names of its
  26  * contributors may be used to endorse or promote products derived from
  27  * this software without specific prior written permission.
  28  *
  29  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  30  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  31  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  32  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  33  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  34  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  35  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  36  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  37  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  38  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  39  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  40  *
  41  * Authors: Korey Sewell
  42  */
  43
  44 #ifndef __CPU_O3_LSQ_HH__
  45 #define __CPU_O3_LSQ_HH__
  46
  47 #include <map>
  48 #include <queue>
  49
  50 #include "arch/generic/tlb.hh"
  51 #include "cpu/inst_seq.hh"
  52 #include "cpu/o3/lsq_unit.hh"
  53 #include "cpu/utils.hh"
  54 #include "enums/SMTQueuePolicy.hh"
  55 #include "mem/port.hh"
  56 #include "sim/sim_object.hh"
  57
  58 struct DerivO3CPUParams;
  59
  60 template <class Impl>
  61 class FullO3CPU;
  62
  63 template <class Impl>
  64 class LSQ
  65
  66 {
  67   public:
  68     typedef typename Impl::O3CPU O3CPU;
  69     typedef typename Impl::DynInstPtr DynInstPtr;
  70     typedef typename Impl::CPUPol::IEW IEW;
  71     typedef typename Impl::CPUPol::LSQUnit LSQUnit;
  72
  73     class LSQRequest;
  74     /** Derived class to hold any sender state the LSQ needs. */
  75     class LSQSenderState : public Packet::SenderState
  76     {
  77       protected:
  78         /** The senderState needs to know the LSQRequest who owns it. */
  79         LSQRequest* _request;
  80
  81         /** Default constructor. */
  82         LSQSenderState(LSQRequest* request, bool isLoad_)
  83             : _request(request), mainPkt(nullptr), pendingPacket(nullptr),
  84               outstanding(0), isLoad(isLoad_), needWB(isLoad_), isSplit(false),
  85               pktToSend(false), deleted(false)
  86           { }
  87       public:
  88
  89         /** Instruction which initiated the access to memory. */
  90         DynInstPtr inst;
  91         /** The main packet from a split load, used during writeback. */
  92         PacketPtr mainPkt;
  93         /** A second packet from a split store that needs sending. */
  94         PacketPtr pendingPacket;
  95         /** Number of outstanding packets to complete. */
  96         uint8_t outstanding;
  97         /** Whether or not it is a load. */
  98         bool isLoad;
  99         /** Whether or not the instruction will need to writeback. */
 100         bool needWB;
 101         /** Whether or not this access is split in two. */
 102         bool isSplit;
 103         /** Whether or not there is a packet that needs sending. */
 104         bool pktToSend;
 105         /** Has the request been deleted?
 106          * LSQ entries can be squashed before the response comes back. in that
 107          * case the SenderState knows.
 108          */
 109         bool deleted;
 110         ContextID contextId() { return inst->contextId(); }
 111
 112         /** Completes a packet and returns whether the access is finished. */
 113         inline bool isComplete() { return outstanding == 0; }
 114         inline void deleteRequest() { deleted = true; }
 115         inline bool alive() { return !deleted; }
 116         LSQRequest* request() { return _request; }
 117         virtual void complete() = 0;
 118         void writebackDone() { _request->writebackDone(); }
 119     };
 120
 121     /**
 122      * DcachePort class for the load/store queue.
 123      */
 124     class DcachePort : public MasterPort
 125     {
 126       protected:
 127
 128         /** Pointer to LSQ. */
 129         LSQ<Impl> *lsq;
 130         FullO3CPU<Impl> *cpu;
 131
 132       public:
 133         /** Default constructor. */
 134         DcachePort(LSQ<Impl> *_lsq, FullO3CPU<Impl>* _cpu)
 135             : MasterPort(_cpu->name() + ".dcache_port", _cpu), lsq(_lsq),
 136               cpu(_cpu)
 137         { }
 138
 139       protected:
 140
 141         /** Timing version of receive.  Handles writing back and
 142          * completing the load or store that has returned from
 143          * memory. */
 144         virtual bool recvTimingResp(PacketPtr pkt);
 145         virtual void recvTimingSnoopReq(PacketPtr pkt);
 146
 147         virtual void recvFunctionalSnoop(PacketPtr pkt)
 148         {
 149             // @todo: Is there a need for potential invalidation here?
 150         }
 151
 152         /** Handles doing a retry of the previous send. */
 153         virtual void recvReqRetry();
 154
 155         /**
 156          * As this CPU requires snooping to maintain the load store queue
 157          * change the behaviour from the base CPU port.
 158          *
 159          * @return true since we have to snoop
 160          */
 161         virtual bool isSnooping() const { return true; }
 162     };
 163
 164     /** Memory operation metadata.
 165      * This class holds the information about a memory operation. It lives
 166      * from initiateAcc to resource deallocation at commit or squash.
 167      * LSQRequest objects are owned by the LQ/SQ Entry in the LSQUnit that
 168      * holds the operation. It is also used by the LSQSenderState. In addition,
 169      * the LSQRequest is a TranslationState, therefore, upon squash, there must
 170      * be a defined ownership transferal in case the LSQ resources are
 171      * deallocated before the TLB is done using the TranslationState. If that
 172      * happens, the LSQRequest will be self-owned, and responsible to detect
 173      * that its services are no longer required and self-destruct.
 174      *
 175      * Lifetime of a LSQRequest:
 176      *                 +--------------------+
 177      *                 |LSQ creates and owns|
 178      *                 +--------------------+
 179      *                           |
 180      *                 +--------------------+
 181      *                 | Initate translation|
 182      *                 +--------------------+
 183      *                           |
 184      *                        ___^___
 185      *                    ___/       \___
 186      *             ______/   Squashed?   \
 187      *            |      \___         ___/
 188      *            |          \___ ___/
 189      *            |              v
 190      *            |              |
 191      *            |    +--------------------+
 192      *            |    |  Translation done  |
 193      *            |    +--------------------+
 194      *            |              |
 195      *            |    +--------------------+
 196      *            |    |     Send packet    |<------+
 197      *            |    +--------------------+       |
 198      *            |              |                  |
 199      *            |           ___^___               |
 200      *            |       ___/       \___           |
 201      *            |  ____/   Squashed?   \          |
 202      *            | |    \___         ___/          |
 203      *            | |        \___ ___/              |
 204      *            | |            v                  |
 205      *            | |            |                  |
 206      *            | |         ___^___               |
 207      *            | |     ___/       \___           |
 208      *            | |    /     Done?     \__________|
 209      *            | |    \___         ___/
 210      *            | |        \___ ___/
 211      *            | |            v
 212      *            | |            |
 213      *            | |  +--------------------+
 214      *            | |  |    Manage stuff    |
 215      *            | |  |   Free resources   |
 216      *            | |  +--------------------+
 217      *            | |
 218      *            | |  +--------------------+
 219      *            | |  |  senderState owns  |
 220      *            | +->|  onRecvTimingResp  |
 221      *            |    |   free resources   |
 222      *            |    +--------------------+
 223      *            |
 224      *            |   +----------------------+
 225      *            |   |  self owned (Trans)  |
 226      *            +-->| on TranslationFinish |
 227      *                |    free resources    |
 228      *                +----------------------+
 229      *
 230      *
 231      */
 232     class LSQRequest : public BaseTLB::Translation
 233     {
 234       protected:
 235         typedef uint32_t FlagsStorage;
 236         typedef ::Flags<FlagsStorage> FlagsType;
 237
 238         enum Flag : FlagsStorage
 239         {
 240             IsLoad              = 0x00000001,
 241             /** True if this is a store/atomic that writes registers (SC). */
 242             WbStore             = 0x00000002,
 243             Delayed             = 0x00000004,
 244             IsSplit             = 0x00000008,
 245             /** True if any translation has been sent to TLB. */
 246             TranslationStarted  = 0x00000010,
 247             /** True if there are un-replied outbound translations.. */
 248             TranslationFinished = 0x00000020,
 249             Sent                = 0x00000040,
 250             Retry               = 0x00000080,
 251             Complete            = 0x00000100,
 252             /** Ownership tracking flags. */
 253             /** Translation squashed. */
 254             TranslationSquashed = 0x00000200,
 255             /** Request discarded */
 256             Discarded           = 0x00000400,
 257             /** LSQ resources freed. */
 258             LSQEntryFreed       = 0x00000800,
 259             /** Store written back. */
 260             WritebackScheduled  = 0x00001000,
 261             WritebackDone       = 0x00002000,
 262             /** True if this is an atomic request */
 263             IsAtomic            = 0x00004000
 264         };
 265         FlagsType flags;
 266
 267         enum class State
 268         {
 269             NotIssued,
 270             Translation,
 271             Request,
 272             Fault,
 273             PartialFault,
 274         };
 275         State _state;
 276         LSQSenderState* _senderState;
 277         void setState(const State& newState) { _state = newState; }
 278
 279         uint32_t numTranslatedFragments;
 280         uint32_t numInTranslationFragments;
 281
 282         /** LQ/SQ entry idx. */
 283         uint32_t _entryIdx;
 284
 285         void markDelayed() override { flags.set(Flag::Delayed); }
 286         bool isDelayed() { return flags.isSet(Flag::Delayed); }
 287
 288       public:
 289         LSQUnit& _port;
 290         const DynInstPtr _inst;
 291         uint32_t _taskId;
 292         PacketDataPtr _data;
 293         std::vector<PacketPtr> _packets;
 294         std::vector<RequestPtr> _requests;
 295         std::vector<Fault> _fault;
 296         uint64_t* _res;
 297         const Addr _addr;
 298         const uint32_t _size;
 299         const Request::Flags _flags;
 300         std::vector<bool> _byteEnable;
 301         uint32_t _numOutstandingPackets;
 302         AtomicOpFunctorPtr _amo_op;
 303       protected:
 304         LSQUnit* lsqUnit() { return &_port; }
 305         LSQRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad) :
 306             _state(State::NotIssued), _senderState(nullptr),
 307             _port(*port), _inst(inst), _data(nullptr),
 308             _res(nullptr), _addr(0), _size(0), _flags(0),
 309             _numOutstandingPackets(0), _amo_op(nullptr)
 310         {
 311             flags.set(Flag::IsLoad, isLoad);
 312             flags.set(Flag::WbStore,
 313                       _inst->isStoreConditional() || _inst->isAtomic());
 314             flags.set(Flag::IsAtomic, _inst->isAtomic());
 315             install();
 316         }
 317         LSQRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad,
 318                    const Addr& addr, const uint32_t& size,
 319                    const Request::Flags& flags_,
 320                    PacketDataPtr data = nullptr, uint64_t* res = nullptr,
 321                    AtomicOpFunctorPtr amo_op = nullptr)
 322             : _state(State::NotIssued), _senderState(nullptr),
 323             numTranslatedFragments(0),
 324             numInTranslationFragments(0),
 325             _port(*port), _inst(inst), _data(data),
 326             _res(res), _addr(addr), _size(size),
 327             _flags(flags_),
 328             _numOutstandingPackets(0),
 329             _amo_op(std::move(amo_op))
 330         {
 331             flags.set(Flag::IsLoad, isLoad);
 332             flags.set(Flag::WbStore,
 333                       _inst->isStoreConditional() || _inst->isAtomic());
 334             flags.set(Flag::IsAtomic, _inst->isAtomic());
 335             install();
 336         }
 337
 338         bool
 339         isLoad() const
 340         {
 341             return flags.isSet(Flag::IsLoad);
 342         }
 343
 344         bool
 345         isAtomic() const
 346         {
 347             return flags.isSet(Flag::IsAtomic);
 348         }
 349
 350         /** Install the request in the LQ/SQ. */
 351         void install()
 352         {
 353             if (isLoad()) {
 354                 _port.loadQueue[_inst->lqIdx].setRequest(this);
 355             } else {
 356                 // Store, StoreConditional, and Atomic requests are pushed
 357                 // to this storeQueue
 358                 _port.storeQueue[_inst->sqIdx].setRequest(this);
 359             }
 360         }
 361         virtual bool
 362         squashed() const override
 363         {
 364             return _inst->isSquashed();
 365         }
 366
 367         /**
 368          * Test if the LSQRequest has been released, i.e. self-owned.
 369          * An LSQRequest manages itself when the resources on the LSQ are freed
 370          * but the translation is still going on and the LSQEntry was freed.
 371          */
 372         bool
 373         isReleased()
 374         {
 375             return flags.isSet(Flag::LSQEntryFreed) ||
 376                 flags.isSet(Flag::Discarded);
 377         }
 378
 379         /** Release the LSQRequest.
 380          * Notify the sender state that the request it points to is not valid
 381          * anymore. Understand if the request is orphan (self-managed) and if
 382          * so, mark it as freed, else destroy it, as this means
 383          * the end of its life cycle.
 384          * An LSQRequest is orphan when its resources are released
 385          * but there is any in-flight translation request to the TLB or access
 386          * request to the memory.
 387          */
 388         void release(Flag reason)
 389         {
 390             assert(reason == Flag::LSQEntryFreed || reason == Flag::Discarded);
 391             if (!isAnyOutstandingRequest()) {
 392                 delete this;
 393             } else {
 394                 if (_senderState) {
 395                     _senderState->deleteRequest();
 396                 }
 397                 flags.set(reason);
 398             }
 399         }
 400
 401         /** Helper function used to add a (sub)request, given its address
 402          * `addr`, size `size` and byte-enable mask `byteEnable`.
 403          *
 404          * The request is only added if the mask is empty or if there is at
 405          * least an active element in it.
 406          */
 407         void
 408         addRequest(Addr addr, unsigned size,
 409                    const std::vector<bool>& byteEnable)
 410         {
 411             if (byteEnable.empty() ||
 412                 isAnyActiveElement(byteEnable.begin(), byteEnable.end())) {
 413                 auto request = std::make_shared<Request>(_inst->getASID(),
 414                         addr, size, _flags, _inst->masterId(),
 415                         _inst->instAddr(), _inst->contextId(),
 416                         std::move(_amo_op));
 417                 if (!byteEnable.empty()) {
 418                     request->setByteEnable(byteEnable);
 419                 }
 420                 _requests.push_back(request);
 421             }
 422         }
 423
 424         /** Destructor.
 425          * The LSQRequest owns the request. If the packet has already been
 426          * sent, the sender state will be deleted upon receiving the reply.
 427          */
 428         virtual ~LSQRequest()
 429         {
 430             assert(!isAnyOutstandingRequest());
 431             _inst->savedReq = nullptr;
 432             if (_senderState)
 433                 delete _senderState;
 434
 435             for (auto r: _packets)
 436                 delete r;
 437         };
 438
 439
 440       public:
 441         /** Convenience getters/setters. */
 442         /** @{ */
 443         /** Set up Context numbers. */
 444         void
 445         setContext(const ContextID& context_id)
 446         {
 447             request()->setContext(context_id);
 448         }
 449
 450         const DynInstPtr&
 451         instruction()
 452         {
 453             return _inst;
 454         }
 455
 456         /** Set up virtual request.
 457          * For a previously allocated Request objects.
 458          */
 459         void
 460         setVirt(int asid, Addr vaddr, unsigned size, Request::Flags flags_,
 461                 MasterID mid, Addr pc)
 462         {
 463             request()->setVirt(asid, vaddr, size, flags_, mid, pc);
 464         }
 465
 466         void
 467         taskId(const uint32_t& v)
 468         {
 469             _taskId = v;
 470             for (auto& r: _requests)
 471                 r->taskId(v);
 472         }
 473
 474         uint32_t taskId() const { return _taskId; }
 475         RequestPtr request(int idx = 0) { return _requests.at(idx); }
 476
 477         const RequestPtr
 478         request(int idx = 0) const
 479         {
 480             return _requests.at(idx);
 481         }
 482
 483         Addr getVaddr(int idx = 0) const { return request(idx)->getVaddr(); }
 484         virtual void initiateTranslation() = 0;
 485
 486         PacketPtr packet(int idx = 0) { return _packets.at(idx); }
 487
 488         virtual PacketPtr
 489         mainPacket()
 490         {
 491             assert (_packets.size() == 1);
 492             return packet();
 493         }
 494
 495         virtual RequestPtr
 496         mainRequest()
 497         {
 498             assert (_requests.size() == 1);
 499             return request();
 500         }
 501
 502         void
 503         senderState(LSQSenderState* st)
 504         {
 505             _senderState = st;
 506             for (auto& pkt: _packets) {
 507                 if (pkt)
 508                     pkt->senderState = st;
 509             }
 510         }
 511
 512         const LSQSenderState*
 513         senderState() const
 514         {
 515             return _senderState;
 516         }
 517
 518         /**
 519          * Mark senderState as discarded. This will cause to discard response
 520          * packets from the cache.
 521          */
 522         void
 523         discardSenderState()
 524         {
 525             assert(_senderState);
 526             _senderState->deleteRequest();
 527         }
 528
 529         /**
 530          * Test if there is any in-flight translation or mem access request
 531          */
 532         bool
 533         isAnyOutstandingRequest()
 534         {
 535             return numInTranslationFragments > 0 ||
 536                 _numOutstandingPackets > 0 ||
 537                 (flags.isSet(Flag::WritebackScheduled) &&
 538                  !flags.isSet(Flag::WritebackDone));
 539         }
 540
 541         bool
 542         isSplit() const
 543         {
 544             return flags.isSet(Flag::IsSplit);
 545         }
 546         /** @} */
 547         virtual bool recvTimingResp(PacketPtr pkt) = 0;
 548         virtual void sendPacketToCache() = 0;
 549         virtual void buildPackets() = 0;
 550
 551         /**
 552          * Memory mapped IPR accesses
 553          */
 554         virtual void handleIprWrite(ThreadContext *thread, PacketPtr pkt) = 0;
 555         virtual Cycles handleIprRead(ThreadContext *thread, PacketPtr pkt) = 0;
 556
 557         /**
 558          * Test if the request accesses a particular cache line.
 559          */
 560         virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask) = 0;
 561
 562         /** Update the status to reflect that a packet was sent. */
 563         void
 564         packetSent()
 565         {
 566             flags.set(Flag::Sent);
 567         }
 568         /** Update the status to reflect that a packet was not sent.
 569          * When a packet fails to be sent, we mark the request as needing a
 570          * retry. Note that Retry flag is sticky.
 571          */
 572         void
 573         packetNotSent()
 574         {
 575             flags.set(Flag::Retry);
 576             flags.clear(Flag::Sent);
 577         }
 578
 579         void sendFragmentToTranslation(int i);
 580         bool
 581         isComplete()
 582         {
 583             return flags.isSet(Flag::Complete);
 584         }
 585
 586         bool
 587         isInTranslation()
 588         {
 589             return _state == State::Translation;
 590         }
 591
 592         bool
 593         isTranslationComplete()
 594         {
 595             return flags.isSet(Flag::TranslationStarted) &&
 596                    !isInTranslation();
 597         }
 598
 599         bool
 600         isTranslationBlocked()
 601         {
 602             return _state == State::Translation &&
 603                 flags.isSet(Flag::TranslationStarted) &&
 604                 !flags.isSet(Flag::TranslationFinished);
 605         }
 606
 607         bool
 608         isSent()
 609         {
 610             return flags.isSet(Flag::Sent);
 611         }
 612
 613         bool
 614         isPartialFault()
 615         {
 616             return _state == State::PartialFault;
 617         }
 618
 619         bool
 620         isMemAccessRequired()
 621         {
 622             return (_state == State::Request ||
 623                     (isPartialFault() && isLoad()));
 624         }
 625
 626         /**
 627          * The LSQ entry is cleared
 628          */
 629         void
 630         freeLSQEntry()
 631         {
 632             release(Flag::LSQEntryFreed);
 633         }
 634
 635         /**
 636          * The request is discarded (e.g. partial store-load forwarding)
 637          */
 638         void
 639         discard()
 640         {
 641             release(Flag::Discarded);
 642         }
 643
 644         void
 645         packetReplied()
 646         {
 647             assert(_numOutstandingPackets > 0);
 648             _numOutstandingPackets--;
 649             if (_numOutstandingPackets == 0 && isReleased())
 650                 delete this;
 651         }
 652
 653         void
 654         writebackScheduled()
 655         {
 656             assert(!flags.isSet(Flag::WritebackScheduled));
 657             flags.set(Flag::WritebackScheduled);
 658         }
 659
 660         void
 661         writebackDone()
 662         {
 663             flags.set(Flag::WritebackDone);
 664             /* If the lsq resources are already free */
 665             if (isReleased()) {
 666                 delete this;
 667             }
 668         }
 669
 670         void
 671         squashTranslation()
 672         {
 673             assert(numInTranslationFragments == 0);
 674             flags.set(Flag::TranslationSquashed);
 675             /* If we are on our own, self-destruct. */
 676             if (isReleased()) {
 677                 delete this;
 678             }
 679         }
 680
 681         void
 682         complete()
 683         {
 684             flags.set(Flag::Complete);
 685         }
 686     };
 687
 688     class SingleDataRequest : public LSQRequest
 689     {
 690       protected:
 691         /* Given that we are inside templates, children need explicit
 692          * declaration of the names in the parent class. */
 693         using Flag = typename LSQRequest::Flag;
 694         using State = typename LSQRequest::State;
 695         using LSQRequest::_addr;
 696         using LSQRequest::_fault;
 697         using LSQRequest::_flags;
 698         using LSQRequest::_size;
 699         using LSQRequest::_byteEnable;
 700         using LSQRequest::_requests;
 701         using LSQRequest::_inst;
 702         using LSQRequest::_packets;
 703         using LSQRequest::_port;
 704         using LSQRequest::_res;
 705         using LSQRequest::_taskId;
 706         using LSQRequest::_senderState;
 707         using LSQRequest::_state;
 708         using LSQRequest::flags;
 709         using LSQRequest::isLoad;
 710         using LSQRequest::isTranslationComplete;
 711         using LSQRequest::lsqUnit;
 712         using LSQRequest::request;
 713         using LSQRequest::sendFragmentToTranslation;
 714         using LSQRequest::setState;
 715         using LSQRequest::numInTranslationFragments;
 716         using LSQRequest::numTranslatedFragments;
 717         using LSQRequest::_numOutstandingPackets;
 718         using LSQRequest::_amo_op;
 719       public:
 720         SingleDataRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad,
 721                           const Addr& addr, const uint32_t& size,
 722                           const Request::Flags& flags_,
 723                           PacketDataPtr data = nullptr,
 724                           uint64_t* res = nullptr,
 725                           AtomicOpFunctorPtr amo_op = nullptr) :
 726             LSQRequest(port, inst, isLoad, addr, size, flags_, data, res,
 727                        std::move(amo_op)) {}
 728
 729         inline virtual ~SingleDataRequest() {}
 730         virtual void initiateTranslation();
 731         virtual void finish(const Fault &fault, const RequestPtr &req,
 732                 ThreadContext* tc, BaseTLB::Mode mode);
 733         virtual bool recvTimingResp(PacketPtr pkt);
 734         virtual void sendPacketToCache();
 735         virtual void buildPackets();
 736         virtual void handleIprWrite(ThreadContext *thread, PacketPtr pkt);
 737         virtual Cycles handleIprRead(ThreadContext *thread, PacketPtr pkt);
 738         virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask);
 739     };
 740
 741     class SplitDataRequest : public LSQRequest
 742     {
 743       protected:
 744         /* Given that we are inside templates, children need explicit
 745          * declaration of the names in the parent class. */
 746         using Flag = typename LSQRequest::Flag;
 747         using State = typename LSQRequest::State;
 748         using LSQRequest::_addr;
 749         using LSQRequest::_data;
 750         using LSQRequest::_fault;
 751         using LSQRequest::_flags;
 752         using LSQRequest::_inst;
 753         using LSQRequest::_packets;
 754         using LSQRequest::_port;
 755         using LSQRequest::_requests;
 756         using LSQRequest::_res;
 757         using LSQRequest::_byteEnable;
 758         using LSQRequest::_senderState;
 759         using LSQRequest::_size;
 760         using LSQRequest::_state;
 761         using LSQRequest::_taskId;
 762         using LSQRequest::flags;
 763         using LSQRequest::isLoad;
 764         using LSQRequest::isTranslationComplete;
 765         using LSQRequest::lsqUnit;
 766         using LSQRequest::numInTranslationFragments;
 767         using LSQRequest::numTranslatedFragments;
 768         using LSQRequest::request;
 769         using LSQRequest::sendFragmentToTranslation;
 770         using LSQRequest::setState;
 771         using LSQRequest::_numOutstandingPackets;
 772
 773         uint32_t numFragments;
 774         uint32_t numReceivedPackets;
 775         RequestPtr mainReq;
 776         PacketPtr _mainPacket;
 777
 778       public:
 779         SplitDataRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad,
 780                          const Addr& addr, const uint32_t& size,
 781                          const Request::Flags & flags_,
 782                          PacketDataPtr data = nullptr,
 783                          uint64_t* res = nullptr) :
 784             LSQRequest(port, inst, isLoad, addr, size, flags_, data, res,
 785                        nullptr),
 786             numFragments(0),
 787             numReceivedPackets(0),
 788             mainReq(nullptr),
 789             _mainPacket(nullptr)
 790         {
 791             flags.set(Flag::IsSplit);
 792         }
 793         virtual ~SplitDataRequest()
 794         {
 795             if (mainReq) {
 796                 mainReq = nullptr;
 797             }
 798             if (_mainPacket) {
 799                 delete _mainPacket;
 800                 _mainPacket = nullptr;
 801             }
 802         }
 803         virtual void finish(const Fault &fault, const RequestPtr &req,
 804                 ThreadContext* tc, BaseTLB::Mode mode);
 805         virtual bool recvTimingResp(PacketPtr pkt);
 806         virtual void initiateTranslation();
 807         virtual void sendPacketToCache();
 808         virtual void buildPackets();
 809
 810         virtual void handleIprWrite(ThreadContext *thread, PacketPtr pkt);
 811         virtual Cycles handleIprRead(ThreadContext *thread, PacketPtr pkt);
 812         virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask);
 813
 814         virtual RequestPtr mainRequest();
 815         virtual PacketPtr mainPacket();
 816     };
 817
 818     /** Constructs an LSQ with the given parameters. */
 819     LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params);
 820     ~LSQ() { }
 821
 822     /** Returns the name of the LSQ. */
 823     std::string name() const;
 824
 825     /** Registers statistics of each LSQ unit. */
 826     void regStats();
 827
 828     /** Sets the pointer to the list of active threads. */
 829     void setActiveThreads(std::list<ThreadID> *at_ptr);
 830
 831     /** Perform sanity checks after a drain. */
 832     void drainSanityCheck() const;
 833     /** Has the LSQ drained? */
 834     bool isDrained() const;
 835     /** Takes over execution from another CPU's thread. */
 836     void takeOverFrom();
 837
 838     /** Number of entries needed for the given amount of threads.*/
 839     int entryAmount(ThreadID num_threads);
 840
 841     /** Ticks the LSQ. */
 842     void tick();
 843
 844     /** Inserts a load into the LSQ. */
 845     void insertLoad(const DynInstPtr &load_inst);
 846     /** Inserts a store into the LSQ. */
 847     void insertStore(const DynInstPtr &store_inst);
 848
 849     /** Executes a load. */
 850     Fault executeLoad(const DynInstPtr &inst);
 851
 852     /** Executes a store. */
 853     Fault executeStore(const DynInstPtr &inst);
 854
 855     /**
 856      * Commits loads up until the given sequence number for a specific thread.
 857      */
 858     void commitLoads(InstSeqNum &youngest_inst, ThreadID tid)
 859     { thread.at(tid).commitLoads(youngest_inst); }
 860
 861     /**
 862      * Commits stores up until the given sequence number for a specific thread.
 863      */
 864     void commitStores(InstSeqNum &youngest_inst, ThreadID tid)
 865     { thread.at(tid).commitStores(youngest_inst); }
 866
 867     /**
 868      * Attempts to write back stores until all cache ports are used or the
 869      * interface becomes blocked.
 870      */
 871     void writebackStores();
 872     /** Same as above, but only for one thread. */
 873     void writebackStores(ThreadID tid);
 874
 875     /**
 876      * Squash instructions from a thread until the specified sequence number.
 877      */
 878     void
 879     squash(const InstSeqNum &squashed_num, ThreadID tid)
 880     {
 881         thread.at(tid).squash(squashed_num);
 882     }
 883
 884     /** Returns whether or not there was a memory ordering violation. */
 885     bool violation();
 886     /**
 887      * Returns whether or not there was a memory ordering violation for a
 888      * specific thread.
 889      */
 890     bool violation(ThreadID tid) { return thread.at(tid).violation(); }
 891
 892     /** Gets the instruction that caused the memory ordering violation. */
 893     DynInstPtr
 894     getMemDepViolator(ThreadID tid)
 895     {
 896         return thread.at(tid).getMemDepViolator();
 897     }
 898
 899     /** Returns the head index of the load queue for a specific thread. */
 900     int getLoadHead(ThreadID tid) { return thread.at(tid).getLoadHead(); }
 901
 902     /** Returns the sequence number of the head of the load queue. */
 903     InstSeqNum
 904     getLoadHeadSeqNum(ThreadID tid)
 905     {
 906         return thread.at(tid).getLoadHeadSeqNum();
 907     }
 908
 909     /** Returns the head index of the store queue. */
 910     int getStoreHead(ThreadID tid) { return thread.at(tid).getStoreHead(); }
 911
 912     /** Returns the sequence number of the head of the store queue. */
 913     InstSeqNum
 914     getStoreHeadSeqNum(ThreadID tid)
 915     {
 916         return thread.at(tid).getStoreHeadSeqNum();
 917     }
 918
 919     /** Returns the number of instructions in all of the queues. */
 920     int getCount();
 921     /** Returns the number of instructions in the queues of one thread. */
 922     int getCount(ThreadID tid) { return thread.at(tid).getCount(); }
 923
 924     /** Returns the total number of loads in the load queue. */
 925     int numLoads();
 926     /** Returns the total number of loads for a single thread. */
 927     int numLoads(ThreadID tid) { return thread.at(tid).numLoads(); }
 928
 929     /** Returns the total number of stores in the store queue. */
 930     int numStores();
 931     /** Returns the total number of stores for a single thread. */
 932     int numStores(ThreadID tid) { return thread.at(tid).numStores(); }
 933
 934     /** Returns the number of free load entries. */
 935     unsigned numFreeLoadEntries();
 936
 937     /** Returns the number of free store entries. */
 938     unsigned numFreeStoreEntries();
 939
 940     /** Returns the number of free entries for a specific thread. */
 941     unsigned numFreeEntries(ThreadID tid);
 942
 943     /** Returns the number of free entries in the LQ for a specific thread. */
 944     unsigned numFreeLoadEntries(ThreadID tid);
 945
 946     /** Returns the number of free entries in the SQ for a specific thread. */
 947     unsigned numFreeStoreEntries(ThreadID tid);
 948
 949     /** Returns if the LSQ is full (either LQ or SQ is full). */
 950     bool isFull();
 951     /**
 952      * Returns if the LSQ is full for a specific thread (either LQ or SQ is
 953      * full).
 954      */
 955     bool isFull(ThreadID tid);
 956
 957     /** Returns if the LSQ is empty (both LQ and SQ are empty). */
 958     bool isEmpty() const;
 959     /** Returns if all of the LQs are empty. */
 960     bool lqEmpty() const;
 961     /** Returns if all of the SQs are empty. */
 962     bool sqEmpty() const;
 963
 964     /** Returns if any of the LQs are full. */
 965     bool lqFull();
 966     /** Returns if the LQ of a given thread is full. */
 967     bool lqFull(ThreadID tid);
 968
 969     /** Returns if any of the SQs are full. */
 970     bool sqFull();
 971     /** Returns if the SQ of a given thread is full. */
 972     bool sqFull(ThreadID tid);
 973
 974     /**
 975      * Returns if the LSQ is stalled due to a memory operation that must be
 976      * replayed.
 977      */
 978     bool isStalled();
 979     /**
 980      * Returns if the LSQ of a specific thread is stalled due to a memory
 981      * operation that must be replayed.
 982      */
 983     bool isStalled(ThreadID tid);
 984
 985     /** Returns whether or not there are any stores to write back to memory. */
 986     bool hasStoresToWB();
 987
 988     /** Returns whether or not a specific thread has any stores to write back
 989      * to memory.
 990      */
 991     bool hasStoresToWB(ThreadID tid) { return thread.at(tid).hasStoresToWB(); }
 992
 993     /** Returns the number of stores a specific thread has to write back. */
 994     int numStoresToWB(ThreadID tid) { return thread.at(tid).numStoresToWB(); }
 995
 996     /** Returns if the LSQ will write back to memory this cycle. */
 997     bool willWB();
 998     /** Returns if the LSQ of a specific thread will write back to memory this
 999      * cycle.
1000      */
1001     bool willWB(ThreadID tid) { return thread.at(tid).willWB(); }
1002
1003     /** Debugging function to print out all instructions. */
1004     void dumpInsts() const;
1005     /** Debugging function to print out instructions from a specific thread. */
1006     void dumpInsts(ThreadID tid) const { thread.at(tid).dumpInsts(); }
1007
1008     /** Executes a read operation, using the load specified at the load
1009      * index.
1010      */
1011     Fault read(LSQRequest* req, int load_idx);
1012
1013     /** Executes a store operation, using the store specified at the store
1014      * index.
1015      */
1016     Fault write(LSQRequest* req, uint8_t *data, int store_idx);
1017
1018     /**
1019      * Retry the previous send that failed.
1020      */
1021     void recvReqRetry();
1022
1023     void completeDataAccess(PacketPtr pkt);
1024     /**
1025      * Handles writing back and completing the load or store that has
1026      * returned from memory.
1027      *
1028      * @param pkt Response packet from the memory sub-system
1029      */
1030     bool recvTimingResp(PacketPtr pkt);
1031
1032     void recvTimingSnoopReq(PacketPtr pkt);
1033
1034     Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
1035                       unsigned int size, Addr addr, Request::Flags flags,
1036                       uint64_t *res, AtomicOpFunctorPtr amo_op,
1037                       const std::vector<bool>& byteEnable);
1038
1039     /** The CPU pointer. */
1040     O3CPU *cpu;
1041
1042     /** The IEW stage pointer. */
1043     IEW *iewStage;
1044
1045     /** Is D-cache blocked? */
1046     bool cacheBlocked() const;
1047     /** Set D-cache blocked status */
1048     void cacheBlocked(bool v);
1049     /** Is any store port available to use? */
1050     bool cachePortAvailable(bool is_load) const;
1051     /** Another store port is in use */
1052     void cachePortBusy(bool is_load);
1053
1054     MasterPort &getDataPort() { return dcachePort; }
1055
1056   protected:
1057     /** D-cache is blocked */
1058     bool _cacheBlocked;
1059     /** The number of cache ports available each cycle (stores only). */
1060     int cacheStorePorts;
1061     /** The number of used cache ports in this cycle by stores. */
1062     int usedStorePorts;
1063     /** The number of cache ports available each cycle (loads only). */
1064     int cacheLoadPorts;
1065     /** The number of used cache ports in this cycle by loads. */
1066     int usedLoadPorts;
1067
1068
1069     /** The LSQ policy for SMT mode. */
1070     SMTQueuePolicy lsqPolicy;
1071
1072     /** Auxiliary function to calculate per-thread max LSQ allocation limit.
1073      * Depending on a policy, number of entries and possibly number of threads
1074      * and threshold, this function calculates how many resources each thread
1075      * can occupy at most.
1076      */
1077     static uint32_t
1078     maxLSQAllocation(SMTQueuePolicy pol, uint32_t entries,
1079             uint32_t numThreads, uint32_t SMTThreshold)
1080     {
1081         if (pol == SMTQueuePolicy::Dynamic) {
1082             return entries;
1083         } else if (pol == SMTQueuePolicy::Partitioned) {
1084             //@todo:make work if part_amt doesnt divide evenly.
1085             return entries / numThreads;
1086         } else if (pol == SMTQueuePolicy::Threshold) {
1087             //Divide up by threshold amount
1088             //@todo: Should threads check the max and the total
1089             //amount of the LSQ
1090             return SMTThreshold;
1091         }
1092         return 0;
1093     }
1094
1095     /** List of Active Threads in System. */
1096     std::list<ThreadID> *activeThreads;
1097
1098     /** Total Size of LQ Entries. */
1099     unsigned LQEntries;
1100     /** Total Size of SQ Entries. */
1101     unsigned SQEntries;
1102
1103     /** Max LQ Size - Used to Enforce Sharing Policies. */
1104     unsigned maxLQEntries;
1105
1106     /** Max SQ Size - Used to Enforce Sharing Policies. */
1107     unsigned maxSQEntries;
1108
1109     /** Data port. */
1110     DcachePort dcachePort;
1111
1112     /** The LSQ units for individual threads. */
1113     std::vector<LSQUnit> thread;
1114
1115     /** Number of Threads. */
1116     ThreadID numThreads;
1117 };
1118
1119 template <class Impl>
1120 Fault
1121 LSQ<Impl>::read(LSQRequest* req, int load_idx)
1122 {
1123     ThreadID tid = cpu->contextToThread(req->request()->contextId());
1124
1125     return thread.at(tid).read(req, load_idx);
1126 }
1127
1128 template <class Impl>
1129 Fault
1130 LSQ<Impl>::write(LSQRequest* req, uint8_t *data, int store_idx)
1131 {
1132     ThreadID tid = cpu->contextToThread(req->request()->contextId());
1133
1134     return thread.at(tid).write(req, data, store_idx);
1135 }
1136
1137 #endif // __CPU_O3_LSQ_HH__