src/cpu/ozone/lw_lsq.hh

   1 /*
   2  * Copyright (c) 2006 The Regents of The University of Michigan
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions are
   7  * met: redistributions of source code must retain the above copyright
   8  * notice, this list of conditions and the following disclaimer;
   9  * redistributions in binary form must reproduce the above copyright
  10  * notice, this list of conditions and the following disclaimer in the
  11  * documentation and/or other materials provided with the distribution;
  12  * neither the name of the copyright holders nor the names of its
  13  * contributors may be used to endorse or promote products derived from
  14  * this software without specific prior written permission.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  17  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  20  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  *
  28  * Authors: Kevin Lim
  29  */
  30
  31 #ifndef __CPU_OZONE_LW_LSQ_HH__
  32 #define __CPU_OZONE_LW_LSQ_HH__
  33
  34 #include <list>
  35 #include <map>
  36 #include <queue>
  37 #include <algorithm>
  38
  39 #include "arch/faults.hh"
  40 #include "arch/isa_traits.hh"
  41 #include "config/full_system.hh"
  42 #include "base/hashmap.hh"
  43 #include "cpu/inst_seq.hh"
  44 #include "mem/packet.hh"
  45 #include "mem/port.hh"
  46 //#include "mem/page_table.hh"
  47 #include "sim/debug.hh"
  48 #include "sim/sim_object.hh"
  49
  50 //class PageTable;
  51
  52 /**
  53  * Class that implements the actual LQ and SQ for each specific thread.
  54  * Both are circular queues; load entries are freed upon committing, while
  55  * store entries are freed once they writeback. The LSQUnit tracks if there
  56  * are memory ordering violations, and also detects partial load to store
  57  * forwarding cases (a store only has part of a load's data) that requires
  58  * the load to wait until the store writes back. In the former case it
  59  * holds onto the instruction until the dependence unit looks at it, and
  60  * in the latter it stalls the LSQ until the store writes back. At that
  61  * point the load is replayed.
  62  */
  63 template <class Impl>
  64 class OzoneLWLSQ {
  65   public:
  66     typedef typename Impl::Params Params;
  67     typedef typename Impl::FullCPU FullCPU;
  68     typedef typename Impl::BackEnd BackEnd;
  69     typedef typename Impl::DynInstPtr DynInstPtr;
  70     typedef typename Impl::IssueStruct IssueStruct;
  71
  72     typedef TheISA::IntReg IntReg;
  73
  74     typedef typename std::map<InstSeqNum, DynInstPtr>::iterator LdMapIt;
  75
  76   private:
  77     class StoreCompletionEvent : public Event {
  78       public:
  79         /** Constructs a store completion event. */
  80         StoreCompletionEvent(DynInstPtr &inst, BackEnd *be,
  81                              Event *wb_event, OzoneLWLSQ *lsq_ptr);
  82
  83         /** Processes the store completion event. */
  84         void process();
  85
  86         /** Returns the description of this event. */
  87         const char *description();
  88
  89       private:
  90         /** The store index of the store being written back. */
  91         DynInstPtr inst;
  92
  93         BackEnd *be;
  94         /** The writeback event for the store.  Needed for store
  95          * conditionals.
  96          */
  97       public:
  98         Event *wbEvent;
  99         bool miss;
 100       private:
 101         /** The pointer to the LSQ unit that issued the store. */
 102         OzoneLWLSQ<Impl> *lsqPtr;
 103     };
 104
 105   public:
 106     /** Constructs an LSQ unit. init() must be called prior to use. */
 107     OzoneLWLSQ();
 108
 109     /** Initializes the LSQ unit with the specified number of entries. */
 110     void init(Params *params, unsigned maxLQEntries,
 111               unsigned maxSQEntries, unsigned id);
 112
 113     /** Returns the name of the LSQ unit. */
 114     std::string name() const;
 115
 116     /** Sets the CPU pointer. */
 117     void setCPU(FullCPU *cpu_ptr)
 118     { cpu = cpu_ptr; }
 119
 120     /** Sets the back-end stage pointer. */
 121     void setBE(BackEnd *be_ptr)
 122     { be = be_ptr; }
 123
 124     /** Sets the page table pointer. */
 125 //    void setPageTable(PageTable *pt_ptr);
 126
 127     /** Ticks the LSQ unit, which in this case only resets the number of
 128      * used cache ports.
 129      * @todo: Move the number of used ports up to the LSQ level so it can
 130      * be shared by all LSQ units.
 131      */
 132     void tick() { usedPorts = 0; }
 133
 134     /** Inserts an instruction. */
 135     void insert(DynInstPtr &inst);
 136     /** Inserts a load instruction. */
 137     void insertLoad(DynInstPtr &load_inst);
 138     /** Inserts a store instruction. */
 139     void insertStore(DynInstPtr &store_inst);
 140
 141     /** Executes a load instruction. */
 142     Fault executeLoad(DynInstPtr &inst);
 143
 144     /** Executes a store instruction. */
 145     Fault executeStore(DynInstPtr &inst);
 146
 147     /** Commits the head load. */
 148     void commitLoad();
 149     /** Commits loads older than a specific sequence number. */
 150     void commitLoads(InstSeqNum &youngest_inst);
 151
 152     /** Commits stores older than a specific sequence number. */
 153     void commitStores(InstSeqNum &youngest_inst);
 154
 155     /** Writes back stores. */
 156     void writebackStores();
 157
 158     // @todo: Include stats in the LSQ unit.
 159     //void regStats();
 160
 161     /** Clears all the entries in the LQ. */
 162     void clearLQ();
 163
 164     /** Clears all the entries in the SQ. */
 165     void clearSQ();
 166
 167     /** Resizes the LQ to a given size. */
 168     void resizeLQ(unsigned size);
 169
 170     /** Resizes the SQ to a given size. */
 171     void resizeSQ(unsigned size);
 172
 173     /** Squashes all instructions younger than a specific sequence number. */
 174     void squash(const InstSeqNum &squashed_num);
 175
 176     /** Returns if there is a memory ordering violation. Value is reset upon
 177      * call to getMemDepViolator().
 178      */
 179     bool violation() { return memDepViolator; }
 180
 181     /** Returns the memory ordering violator. */
 182     DynInstPtr getMemDepViolator();
 183
 184     /** Returns if a load became blocked due to the memory system.  It clears
 185      *  the bool's value upon this being called.
 186      */
 187     bool loadBlocked()
 188     { return isLoadBlocked; }
 189
 190     void clearLoadBlocked()
 191     { isLoadBlocked = false; }
 192
 193     bool isLoadBlockedHandled()
 194     { return loadBlockedHandled; }
 195
 196     void setLoadBlockedHandled()
 197     { loadBlockedHandled = true; }
 198
 199     /** Returns the number of free entries (min of free LQ and SQ entries). */
 200     unsigned numFreeEntries();
 201
 202     /** Returns the number of loads ready to execute. */
 203     int numLoadsReady();
 204
 205     /** Returns the number of loads in the LQ. */
 206     int numLoads() { return loads; }
 207
 208     /** Returns the number of stores in the SQ. */
 209     int numStores() { return stores; }
 210
 211     /** Returns if either the LQ or SQ is full. */
 212     bool isFull() { return lqFull() || sqFull(); }
 213
 214     /** Returns if the LQ is full. */
 215     bool lqFull() { return loads >= (LQEntries - 1); }
 216
 217     /** Returns if the SQ is full. */
 218     bool sqFull() { return stores >= (SQEntries - 1); }
 219
 220     /** Debugging function to dump instructions in the LSQ. */
 221     void dumpInsts();
 222
 223     /** Returns the number of instructions in the LSQ. */
 224     unsigned getCount() { return loads + stores; }
 225
 226     /** Returns if there are any stores to writeback. */
 227     bool hasStoresToWB() { return storesToWB; }
 228
 229     /** Returns the number of stores to writeback. */
 230     int numStoresToWB() { return storesToWB; }
 231
 232     /** Returns if the LSQ unit will writeback on this cycle. */
 233     bool willWB() { return storeQueue.back().canWB &&
 234                         !storeQueue.back().completed/* &&
 235                                                        !dcacheInterface->isBlocked()*/; }
 236
 237     void switchOut();
 238
 239     void takeOverFrom(ThreadContext *old_tc = NULL);
 240
 241     bool isSwitchedOut() { return switchedOut; }
 242
 243     bool switchedOut;
 244
 245   private:
 246     /** Completes the store at the specified index. */
 247     void completeStore(int store_idx);
 248
 249   private:
 250     /** Pointer to the CPU. */
 251     FullCPU *cpu;
 252
 253     /** Pointer to the back-end stage. */
 254     BackEnd *be;
 255
 256     MemObject *mem;
 257
 258     class DcachePort : public Port
 259     {
 260       protected:
 261         FullCPU *cpu;
 262
 263       public:
 264         DcachePort(const std::string &_name, FullCPU *_cpu)
 265             : Port(_name), cpu(_cpu)
 266         { }
 267
 268       protected:
 269         virtual Tick recvAtomic(PacketPtr pkt);
 270
 271         virtual void recvFunctional(PacketPtr pkt);
 272
 273         virtual void recvStatusChange(Status status);
 274
 275         virtual void getDeviceAddressRanges(AddrRangeList &resp,
 276                                             AddrRangeList &snoop)
 277         { resp.clear(); snoop.clear(); }
 278
 279         virtual bool recvTiming(PacketPtr pkt);
 280
 281         virtual void recvRetry();
 282     };
 283
 284     /** Pointer to the D-cache. */
 285     DcachePort dcachePort;
 286
 287     /** Pointer to the page table. */
 288 //    PageTable *pTable;
 289
 290   public:
 291     struct SQEntry {
 292         /** Constructs an empty store queue entry. */
 293         SQEntry()
 294             : inst(NULL), req(NULL), size(0), data(0),
 295               canWB(0), committed(0), completed(0), lqIt(NULL)
 296         { }
 297
 298         /** Constructs a store queue entry for a given instruction. */
 299         SQEntry(DynInstPtr &_inst)
 300             : inst(_inst), req(NULL), size(0), data(0),
 301               canWB(0), committed(0), completed(0), lqIt(NULL)
 302         { }
 303
 304         /** The store instruction. */
 305         DynInstPtr inst;
 306         /** The memory request for the store. */
 307         RequestPtr req;
 308         /** The size of the store. */
 309         int size;
 310         /** The store data. */
 311         IntReg data;
 312         /** Whether or not the store can writeback. */
 313         bool canWB;
 314         /** Whether or not the store is committed. */
 315         bool committed;
 316         /** Whether or not the store is completed. */
 317         bool completed;
 318
 319         typename std::list<DynInstPtr>::iterator lqIt;
 320     };
 321
 322     enum Status {
 323         Running,
 324         Idle,
 325         DcacheMissStall,
 326         DcacheMissSwitch
 327     };
 328
 329   private:
 330     /** The OzoneLWLSQ thread id. */
 331     unsigned lsqID;
 332
 333     /** The status of the LSQ unit. */
 334     Status _status;
 335
 336     /** The store queue. */
 337     std::list<SQEntry> storeQueue;
 338     /** The load queue. */
 339     std::list<DynInstPtr> loadQueue;
 340
 341     typedef typename std::list<SQEntry>::iterator SQIt;
 342     typedef typename std::list<DynInstPtr>::iterator LQIt;
 343
 344
 345     struct HashFn {
 346     size_t operator() (const int a) const
 347     {
 348         unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF;
 349
 350         return hash;
 351     }
 352     };
 353
 354     m5::hash_map<int, SQIt, HashFn> SQItHash;
 355     std::queue<int> SQIndices;
 356     m5::hash_map<int, LQIt, HashFn> LQItHash;
 357     std::queue<int> LQIndices;
 358
 359     typedef typename m5::hash_map<int, LQIt, HashFn>::iterator LQHashIt;
 360     typedef typename m5::hash_map<int, SQIt, HashFn>::iterator SQHashIt;
 361     // Consider making these 16 bits
 362     /** The number of LQ entries. */
 363     unsigned LQEntries;
 364     /** The number of SQ entries. */
 365     unsigned SQEntries;
 366
 367     /** The number of load instructions in the LQ. */
 368     int loads;
 369     /** The number of store instructions in the SQ (excludes those waiting to
 370      * writeback).
 371      */
 372     int stores;
 373
 374     int storesToWB;
 375
 376     /// @todo Consider moving to a more advanced model with write vs read ports
 377     /** The number of cache ports available each cycle. */
 378     int cachePorts;
 379
 380     /** The number of used cache ports in this cycle. */
 381     int usedPorts;
 382
 383     //list<InstSeqNum> mshrSeqNums;
 384
 385      //Stats::Scalar<> dcacheStallCycles;
 386     Counter lastDcacheStall;
 387
 388     // Make these per thread?
 389     /** Whether or not the LSQ is stalled. */
 390     bool stalled;
 391     /** The store that causes the stall due to partial store to load
 392      * forwarding.
 393      */
 394     InstSeqNum stallingStoreIsn;
 395     /** The index of the above store. */
 396     LQIt stallingLoad;
 397
 398     /** Whether or not a load is blocked due to the memory system.  It is
 399      *  cleared when this value is checked via loadBlocked().
 400      */
 401     bool isLoadBlocked;
 402
 403     bool loadBlockedHandled;
 404
 405     InstSeqNum blockedLoadSeqNum;
 406
 407     /** The oldest faulting load instruction. */
 408     DynInstPtr loadFaultInst;
 409     /** The oldest faulting store instruction. */
 410     DynInstPtr storeFaultInst;
 411
 412     /** The oldest load that caused a memory ordering violation. */
 413     DynInstPtr memDepViolator;
 414
 415     // Will also need how many read/write ports the Dcache has.  Or keep track
 416     // of that in stage that is one level up, and only call executeLoad/Store
 417     // the appropriate number of times.
 418
 419   public:
 420     /** Executes the load at the given index. */
 421     template <class T>
 422     Fault read(RequestPtr req, T &data, int load_idx);
 423
 424     /** Executes the store at the given index. */
 425     template <class T>
 426     Fault write(RequestPtr req, T &data, int store_idx);
 427
 428     /** Returns the sequence number of the head load instruction. */
 429     InstSeqNum getLoadHeadSeqNum()
 430     {
 431         if (!loadQueue.empty()) {
 432             return loadQueue.back()->seqNum;
 433         } else {
 434             return 0;
 435         }
 436
 437     }
 438
 439     /** Returns the sequence number of the head store instruction. */
 440     InstSeqNum getStoreHeadSeqNum()
 441     {
 442         if (!storeQueue.empty()) {
 443             return storeQueue.back().inst->seqNum;
 444         } else {
 445             return 0;
 446         }
 447
 448     }
 449
 450     /** Returns whether or not the LSQ unit is stalled. */
 451     bool isStalled()  { return stalled; }
 452 };
 453
 454 template <class Impl>
 455 template <class T>
 456 Fault
 457 OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
 458 {
 459     //Depending on issue2execute delay a squashed load could
 460     //execute if it is found to be squashed in the same
 461     //cycle it is scheduled to execute
 462     typename m5::hash_map<int, LQIt, HashFn>::iterator
 463         lq_hash_it = LQItHash.find(load_idx);
 464     assert(lq_hash_it != LQItHash.end());
 465     DynInstPtr inst = (*(*lq_hash_it).second);
 466
 467     // Make sure this isn't an uncacheable access
 468     // A bit of a hackish way to get uncached accesses to work only if they're
 469     // at the head of the LSQ and are ready to commit (at the head of the ROB
 470     // too).
 471     // @todo: Fix uncached accesses.
 472     if (req->getFlags() & UNCACHEABLE &&
 473         (inst != loadQueue.back() || !inst->reachedCommit)) {
 474         DPRINTF(OzoneLSQ, "[sn:%lli] Uncached load and not head of "
 475                 "commit/LSQ!\n",
 476                 inst->seqNum);
 477         be->rescheduleMemInst(inst);
 478         return TheISA::genMachineCheckFault();
 479     }
 480
 481     // Check the SQ for any previous stores that might lead to forwarding
 482     SQIt sq_it = storeQueue.begin();
 483     int store_size = 0;
 484
 485     DPRINTF(OzoneLSQ, "Read called, load idx: %i addr: %#x\n",
 486             load_idx, req->getPaddr());
 487
 488     while (sq_it != storeQueue.end() && (*sq_it).inst->seqNum > inst->seqNum)
 489         ++sq_it;
 490
 491     while (1) {
 492         // End once we've reached the top of the LSQ
 493         if (sq_it == storeQueue.end()) {
 494             break;
 495         }
 496
 497         assert((*sq_it).inst);
 498
 499         store_size = (*sq_it).size;
 500
 501         if (store_size == 0) {
 502             sq_it++;
 503             continue;
 504         }
 505
 506         // Check if the store data is within the lower and upper bounds of
 507         // addresses that the request needs.
 508         bool store_has_lower_limit =
 509             req->getVaddr() >= (*sq_it).inst->effAddr;
 510         bool store_has_upper_limit =
 511             (req->getVaddr() + req->getSize()) <= ((*sq_it).inst->effAddr +
 512                                                    store_size);
 513         bool lower_load_has_store_part =
 514             req->getVaddr() < ((*sq_it).inst->effAddr +
 515                                store_size);
 516         bool upper_load_has_store_part =
 517             (req->getVaddr() + req->getSize()) > (*sq_it).inst->effAddr;
 518
 519         // If the store's data has all of the data needed, we can forward.
 520         if (store_has_lower_limit && store_has_upper_limit) {
 521             int shift_amt = req->getVaddr() & (store_size - 1);
 522             // Assumes byte addressing
 523             shift_amt = shift_amt << 3;
 524
 525             // Cast this to type T?
 526             data = (*sq_it).data >> shift_amt;
 527
 528             assert(!inst->memData);
 529             inst->memData = new uint8_t[64];
 530
 531             memcpy(inst->memData, &data, req->getSize());
 532
 533             DPRINTF(OzoneLSQ, "Forwarding from store [sn:%lli] to load to "
 534                     "[sn:%lli] addr %#x, data %#x\n",
 535                     (*sq_it).inst->seqNum, inst->seqNum, req->vaddr, *(inst->memData));
 536 /*
 537             typename BackEnd::LdWritebackEvent *wb =
 538                 new typename BackEnd::LdWritebackEvent(inst,
 539                                                        be);
 540
 541             // We'll say this has a 1 cycle load-store forwarding latency
 542             // for now.
 543             // FIXME - Need to make this a parameter.
 544             wb->schedule(curTick);
 545 */
 546             // Should keep track of stat for forwarded data
 547             return NoFault;
 548         } else if ((store_has_lower_limit && lower_load_has_store_part) ||
 549                    (store_has_upper_limit && upper_load_has_store_part) ||
 550                    (lower_load_has_store_part && upper_load_has_store_part)) {
 551             // This is the partial store-load forwarding case where a store
 552             // has only part of the load's data.
 553
 554             // If it's already been written back, then don't worry about
 555             // stalling on it.
 556             if ((*sq_it).completed) {
 557                 sq_it++;
 558                 break;
 559             }
 560
 561             // Must stall load and force it to retry, so long as it's the oldest
 562             // load that needs to do so.
 563             if (!stalled ||
 564                 (stalled &&
 565                  inst->seqNum <
 566                  (*stallingLoad)->seqNum)) {
 567                 stalled = true;
 568                 stallingStoreIsn = (*sq_it).inst->seqNum;
 569                 stallingLoad = (*lq_hash_it).second;
 570             }
 571
 572             // Tell IQ/mem dep unit that this instruction will need to be
 573             // rescheduled eventually
 574             be->rescheduleMemInst(inst);
 575
 576             DPRINTF(OzoneLSQ, "Load-store forwarding mis-match. "
 577                     "Store [sn:%lli] to load addr %#x\n",
 578                     (*sq_it).inst->seqNum, req->vaddr);
 579
 580             return NoFault;
 581         }
 582         sq_it++;
 583     }
 584
 585     // If there's no forwarding case, then go access memory
 586     DPRINTF(OzoneLSQ, "Doing functional access for inst PC %#x\n",
 587             inst->readPC());
 588
 589     assert(!inst->memData);
 590     inst->memData = new uint8_t[64];
 591
 592     ++usedPorts;
 593
 594     DPRINTF(OzoneLSQ, "Doing timing access for inst PC %#x\n",
 595             inst->readPC());
 596
 597     PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
 598     data_pkt->dataStatic(inst->memData);
 599
 600     // if we have a cache, do cache access too
 601     if (!dcachePort.sendTiming(data_pkt)) {
 602         // There's an older load that's already going to squash.
 603         if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum)
 604             return NoFault;
 605
 606         // Record that the load was blocked due to memory.  This
 607         // load will squash all instructions after it, be
 608         // refetched, and re-executed.
 609         isLoadBlocked = true;
 610         loadBlockedHandled = false;
 611         blockedLoadSeqNum = inst->seqNum;
 612         // No fault occurred, even though the interface is blocked.
 613         return NoFault;
 614     }
 615
 616     if (data_pkt->result != Packet::Success) {
 617         DPRINTF(OzoneLSQ, "OzoneLSQ: D-cache miss!\n");
 618         DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
 619                 inst->seqNum);
 620     } else {
 621         DPRINTF(OzoneLSQ, "OzoneLSQ: D-cache hit!\n");
 622         DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
 623                 inst->seqNum);
 624     }
 625
 626     return NoFault;
 627 }
 628
 629 template <class Impl>
 630 template <class T>
 631 Fault
 632 OzoneLWLSQ<Impl>::write(RequestPtr req, T &data, int store_idx)
 633 {
 634     SQHashIt sq_hash_it = SQItHash.find(store_idx);
 635     assert(sq_hash_it != SQItHash.end());
 636
 637     SQIt sq_it = (*sq_hash_it).second;
 638     assert((*sq_it).inst);
 639
 640     DPRINTF(OzoneLSQ, "Doing write to store idx %i, addr %#x data %#x"
 641             " | [sn:%lli]\n",
 642             store_idx, req->getPaddr(), data, (*sq_it).inst->seqNum);
 643
 644     (*sq_it).req = req;
 645     (*sq_it).size = sizeof(T);
 646     (*sq_it).data = data;
 647 /*
 648     assert(!req->data);
 649     req->data = new uint8_t[64];
 650     memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size);
 651 */
 652
 653     // This function only writes the data to the store queue, so no fault
 654     // can happen here.
 655     return NoFault;
 656 }
 657
 658 #endif // __CPU_OZONE_LW_LSQ_HH__