src/cpu/ozone/back_end.hh

   1 /*
   2  * Copyright (c) 2006 The Regents of The University of Michigan
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions are
   7  * met: redistributions of source code must retain the above copyright
   8  * notice, this list of conditions and the following disclaimer;
   9  * redistributions in binary form must reproduce the above copyright
  10  * notice, this list of conditions and the following disclaimer in the
  11  * documentation and/or other materials provided with the distribution;
  12  * neither the name of the copyright holders nor the names of its
  13  * contributors may be used to endorse or promote products derived from
  14  * this software without specific prior written permission.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  17  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  20  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  *
  28  * Authors: Kevin Lim
  29  */
  30
  31 #ifndef __CPU_OZONE_BACK_END_HH__
  32 #define __CPU_OZONE_BACK_END_HH__
  33
  34 #include <list>
  35 #include <queue>
  36 #include <string>
  37
  38 #include "arch/faults.hh"
  39 #include "base/timebuf.hh"
  40 #include "cpu/inst_seq.hh"
  41 #include "cpu/ozone/rename_table.hh"
  42 #include "cpu/ozone/thread_state.hh"
  43 #include "mem/request.hh"
  44 #include "sim/eventq.hh"
  45
  46 class ThreadContext;
  47
  48 template <class Impl>
  49 class OzoneThreadState;
  50
  51 template <class Impl>
  52 class BackEnd
  53 {
  54   public:
  55     typedef OzoneThreadState<Impl> Thread;
  56
  57     typedef typename Impl::Params Params;
  58     typedef typename Impl::DynInst DynInst;
  59     typedef typename Impl::DynInstPtr DynInstPtr;
  60     typedef typename Impl::FullCPU FullCPU;
  61     typedef typename Impl::FrontEnd FrontEnd;
  62     typedef typename Impl::FullCPU::CommStruct CommStruct;
  63
  64     struct SizeStruct {
  65         int size;
  66     };
  67
  68     typedef SizeStruct DispatchToIssue;
  69     typedef SizeStruct IssueToExec;
  70     typedef SizeStruct ExecToCommit;
  71     typedef SizeStruct Writeback;
  72
  73     TimeBuffer<DispatchToIssue> d2i;
  74     typename TimeBuffer<DispatchToIssue>::wire instsToDispatch;
  75     TimeBuffer<IssueToExec> i2e;
  76     typename TimeBuffer<IssueToExec>::wire instsToExecute;
  77     TimeBuffer<ExecToCommit> e2c;
  78     TimeBuffer<Writeback> numInstsToWB;
  79
  80     TimeBuffer<CommStruct> *comm;
  81     typename TimeBuffer<CommStruct>::wire toIEW;
  82     typename TimeBuffer<CommStruct>::wire fromCommit;
  83
  84     class InstQueue {
  85         enum queue {
  86             NonSpec,
  87             IQ,
  88             ToBeScheduled,
  89             ReadyList,
  90             ReplayList
  91         };
  92         struct pqCompare {
  93             bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
  94             {
  95                 return lhs->seqNum > rhs->seqNum;
  96             }
  97         };
  98       public:
  99         InstQueue(Params *params);
 100
 101         std::string name() const;
 102
 103         void regStats();
 104
 105         void setIssueExecQueue(TimeBuffer<IssueToExec> *i2e_queue);
 106
 107         void setBE(BackEnd *_be) { be = _be; }
 108
 109         void insert(DynInstPtr &inst);
 110
 111         void scheduleReadyInsts();
 112
 113         void scheduleNonSpec(const InstSeqNum &sn);
 114
 115         DynInstPtr getReadyInst();
 116
 117         void commit(const InstSeqNum &sn) {}
 118
 119         void squash(const InstSeqNum &sn);
 120
 121         int wakeDependents(DynInstPtr &inst);
 122
 123         /** Tells memory dependence unit that a memory instruction needs to be
 124          * rescheduled. It will re-execute once replayMemInst() is called.
 125          */
 126         void rescheduleMemInst(DynInstPtr &inst);
 127
 128         /** Re-executes all rescheduled memory instructions. */
 129         void replayMemInst(DynInstPtr &inst);
 130
 131         /** Completes memory instruction. */
 132         void completeMemInst(DynInstPtr &inst);
 133
 134         void violation(DynInstPtr &inst, DynInstPtr &violation) { }
 135
 136         bool isFull() { return numInsts >= size; }
 137
 138         void dumpInsts();
 139
 140       private:
 141         bool find(queue q, typename std::list<DynInstPtr>::iterator it);
 142         BackEnd *be;
 143         TimeBuffer<IssueToExec> *i2e;
 144         typename TimeBuffer<IssueToExec>::wire numIssued;
 145         typedef typename std::list<DynInstPtr> InstList;
 146         typedef typename std::list<DynInstPtr>::iterator InstListIt;
 147         typedef typename std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> ReadyInstQueue;
 148         // Not sure I need the IQ list; it just needs to be a count.
 149         InstList iq;
 150         InstList toBeScheduled;
 151         InstList readyList;
 152         InstList nonSpec;
 153         InstList replayList;
 154         ReadyInstQueue readyQueue;
 155       public:
 156         int size;
 157         int numInsts;
 158         int width;
 159
 160         Stats::VectorDistribution<> occ_dist;
 161
 162         Stats::Vector<> inst_count;
 163         Stats::Vector<> peak_inst_count;
 164         Stats::Scalar<> empty_count;
 165         Stats::Scalar<> current_count;
 166         Stats::Scalar<> fullCount;
 167
 168         Stats::Formula occ_rate;
 169         Stats::Formula avg_residency;
 170         Stats::Formula empty_rate;
 171         Stats::Formula full_rate;
 172     };
 173
 174     /** LdWriteback event for a load completion. */
 175     class LdWritebackEvent : public Event {
 176       private:
 177         /** Instruction that is writing back data to the register file. */
 178         DynInstPtr inst;
 179         /** Pointer to IEW stage. */
 180         BackEnd *be;
 181
 182       public:
 183         /** Constructs a load writeback event. */
 184         LdWritebackEvent(DynInstPtr &_inst, BackEnd *be);
 185
 186         /** Processes writeback event. */
 187         virtual void process();
 188         /** Returns the description of the writeback event. */
 189         virtual const char *description();
 190     };
 191
 192     BackEnd(Params *params);
 193
 194     std::string name() const;
 195
 196     void regStats();
 197
 198     void setCPU(FullCPU *cpu_ptr)
 199     { cpu = cpu_ptr; }
 200
 201     void setFrontEnd(FrontEnd *front_end_ptr)
 202     { frontEnd = front_end_ptr; }
 203
 204     void setTC(ThreadContext *tc_ptr)
 205     { tc = tc_ptr; }
 206
 207     void setThreadState(Thread *thread_ptr)
 208     { thread = thread_ptr; }
 209
 210     void setCommBuffer(TimeBuffer<CommStruct> *_comm);
 211
 212     void tick();
 213     void squash();
 214     void squashFromTC();
 215     bool tcSquash;
 216
 217     template <class T>
 218     Fault read(RequestPtr req, T &data, int load_idx);
 219
 220     template <class T>
 221     Fault write(RequestPtr req, T &data, int store_idx);
 222
 223     Addr readCommitPC() { return commitPC; }
 224
 225     Addr commitPC;
 226
 227     bool robEmpty() { return instList.empty(); }
 228
 229     bool isFull() { return numInsts >= numROBEntries; }
 230     bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }
 231
 232     /** Tells memory dependence unit that a memory instruction needs to be
 233      * rescheduled. It will re-execute once replayMemInst() is called.
 234      */
 235     void rescheduleMemInst(DynInstPtr &inst)
 236     { IQ.rescheduleMemInst(inst); }
 237
 238     /** Re-executes all rescheduled memory instructions. */
 239     void replayMemInst(DynInstPtr &inst)
 240     { IQ.replayMemInst(inst); }
 241
 242     /** Completes memory instruction. */
 243     void completeMemInst(DynInstPtr &inst)
 244     { IQ.completeMemInst(inst); }
 245
 246     void fetchFault(Fault &fault);
 247
 248   private:
 249     void updateStructures();
 250     void dispatchInsts();
 251     void dispatchStall();
 252     void checkDispatchStatus();
 253     void scheduleReadyInsts();
 254     void executeInsts();
 255     void commitInsts();
 256     void addToIQ(DynInstPtr &inst);
 257     void addToLSQ(DynInstPtr &inst);
 258     void instToCommit(DynInstPtr &inst);
 259     void writebackInsts();
 260     bool commitInst(int inst_num);
 261     void squash(const InstSeqNum &sn);
 262     void squashDueToBranch(DynInstPtr &inst);
 263     void squashDueToMemBlocked(DynInstPtr &inst);
 264     void updateExeInstStats(DynInstPtr &inst);
 265     void updateComInstStats(DynInstPtr &inst);
 266
 267   public:
 268     FullCPU *cpu;
 269
 270     FrontEnd *frontEnd;
 271
 272     ThreadContext *tc;
 273
 274     Thread *thread;
 275
 276     enum Status {
 277         Running,
 278         Idle,
 279         DcacheMissStall,
 280         DcacheMissComplete,
 281         Blocked
 282     };
 283
 284     Status status;
 285
 286     Status dispatchStatus;
 287
 288     Counter funcExeInst;
 289
 290   private:
 291 //    typedef typename Impl::InstQueue InstQueue;
 292
 293     InstQueue IQ;
 294
 295     typedef typename Impl::LdstQueue LdstQueue;
 296
 297     LdstQueue LSQ;
 298   public:
 299     RenameTable<Impl> commitRenameTable;
 300
 301     RenameTable<Impl> renameTable;
 302   private:
 303     class DCacheCompletionEvent : public Event
 304     {
 305       private:
 306         BackEnd *be;
 307
 308       public:
 309         DCacheCompletionEvent(BackEnd *_be);
 310
 311         virtual void process();
 312         virtual const char *description();
 313     };
 314
 315     friend class DCacheCompletionEvent;
 316
 317     DCacheCompletionEvent cacheCompletionEvent;
 318
 319     MemInterface *dcacheInterface;
 320
 321     Request *memReq;
 322
 323     // General back end width. Used if the more specific isn't given.
 324     int width;
 325
 326     // Dispatch width.
 327     int dispatchWidth;
 328     int numDispatchEntries;
 329     int dispatchSize;
 330
 331     int issueWidth;
 332
 333     // Writeback width
 334     int wbWidth;
 335
 336     // Commit width
 337     int commitWidth;
 338
 339     /** Index into queue of instructions being written back. */
 340     unsigned wbNumInst;
 341
 342     /** Cycle number within the queue of instructions being written
 343      * back.  Used in case there are too many instructions writing
 344      * back at the current cycle and writesbacks need to be scheduled
 345      * for the future. See comments in instToCommit().
 346      */
 347     unsigned wbCycle;
 348
 349     int numROBEntries;
 350     int numInsts;
 351
 352     bool squashPending;
 353     InstSeqNum squashSeqNum;
 354     Addr squashNextPC;
 355
 356     Fault faultFromFetch;
 357
 358   private:
 359     typedef typename std::list<DynInstPtr>::iterator InstListIt;
 360
 361     std::list<DynInstPtr> instList;
 362     std::list<DynInstPtr> dispatch;
 363     std::list<DynInstPtr> writeback;
 364
 365     int latency;
 366
 367     int squashLatency;
 368
 369     bool exactFullStall;
 370
 371     bool fetchRedirect[Impl::MaxThreads];
 372
 373     // number of cycles stalled for D-cache misses
 374 /*    Stats::Scalar<> dcacheStallCycles;
 375       Counter lastDcacheStall;
 376 */
 377     Stats::Vector<> rob_cap_events;
 378     Stats::Vector<> rob_cap_inst_count;
 379     Stats::Vector<> iq_cap_events;
 380     Stats::Vector<> iq_cap_inst_count;
 381     // total number of instructions executed
 382     Stats::Vector<> exe_inst;
 383     Stats::Vector<> exe_swp;
 384     Stats::Vector<> exe_nop;
 385     Stats::Vector<> exe_refs;
 386     Stats::Vector<> exe_loads;
 387     Stats::Vector<> exe_branches;
 388
 389     Stats::Vector<> issued_ops;
 390
 391     // total number of loads forwaded from LSQ stores
 392     Stats::Vector<> lsq_forw_loads;
 393
 394     // total number of loads ignored due to invalid addresses
 395     Stats::Vector<> inv_addr_loads;
 396
 397     // total number of software prefetches ignored due to invalid addresses
 398     Stats::Vector<> inv_addr_swpfs;
 399     // ready loads blocked due to memory disambiguation
 400     Stats::Vector<> lsq_blocked_loads;
 401
 402     Stats::Scalar<> lsqInversion;
 403
 404     Stats::Vector<> n_issued_dist;
 405     Stats::VectorDistribution<> issue_delay_dist;
 406
 407     Stats::VectorDistribution<> queue_res_dist;
 408 /*
 409     Stats::Vector<> stat_fu_busy;
 410     Stats::Vector2d<> stat_fuBusy;
 411     Stats::Vector<> dist_unissued;
 412     Stats::Vector2d<> stat_issued_inst_type;
 413
 414     Stats::Formula misspec_cnt;
 415     Stats::Formula misspec_ipc;
 416     Stats::Formula issue_rate;
 417     Stats::Formula issue_stores;
 418     Stats::Formula issue_op_rate;
 419     Stats::Formula fu_busy_rate;
 420     Stats::Formula commit_stores;
 421     Stats::Formula commit_ipc;
 422     Stats::Formula commit_ipb;
 423     Stats::Formula lsq_inv_rate;
 424 */
 425     Stats::Vector<> writeback_count;
 426     Stats::Vector<> producer_inst;
 427     Stats::Vector<> consumer_inst;
 428     Stats::Vector<> wb_penalized;
 429
 430     Stats::Formula wb_rate;
 431     Stats::Formula wb_fanout;
 432     Stats::Formula wb_penalized_rate;
 433
 434     // total number of instructions committed
 435     Stats::Vector<> stat_com_inst;
 436     Stats::Vector<> stat_com_swp;
 437     Stats::Vector<> stat_com_refs;
 438     Stats::Vector<> stat_com_loads;
 439     Stats::Vector<> stat_com_membars;
 440     Stats::Vector<> stat_com_branches;
 441
 442     Stats::Distribution<> n_committed_dist;
 443
 444     Stats::Scalar<> commit_eligible_samples;
 445     Stats::Vector<> commit_eligible;
 446
 447     Stats::Scalar<> ROB_fcount;
 448     Stats::Formula ROB_full_rate;
 449
 450     Stats::Vector<>  ROB_count;  // cumulative ROB occupancy
 451     Stats::Formula ROB_occ_rate;
 452     Stats::VectorDistribution<> ROB_occ_dist;
 453   public:
 454     void dumpInsts();
 455 };
 456
 457 template <class Impl>
 458 template <class T>
 459 Fault
 460 BackEnd<Impl>::read(RequestPtr req, T &data, int load_idx)
 461 {
 462 /*    memReq->reset(addr, sizeof(T), flags);
 463
 464     // translate to physical address
 465     Fault fault = cpu->translateDataReadReq(memReq);
 466
 467     // if we have a cache, do cache access too
 468     if (fault == NoFault && dcacheInterface) {
 469         memReq->cmd = Read;
 470         memReq->completionEvent = NULL;
 471         memReq->time = curTick;
 472         memReq->flags &= ~INST_READ;
 473         MemAccessResult result = dcacheInterface->access(memReq);
 474
 475         // Ugly hack to get an event scheduled *only* if the access is
 476         // a miss.  We really should add first-class support for this
 477         // at some point.
 478         if (result != MA_HIT && dcacheInterface->doEvents()) {
 479             // Fix this hack for keeping funcExeInst correct with loads that
 480             // are executed twice.
 481             --funcExeInst;
 482
 483             memReq->completionEvent = &cacheCompletionEvent;
 484             lastDcacheStall = curTick;
 485 //          unscheduleTickEvent();
 486 //          status = DcacheMissStall;
 487             DPRINTF(OzoneCPU, "Dcache miss stall!\n");
 488         } else {
 489             // do functional access
 490             fault = thread->mem->read(memReq, data);
 491
 492         }
 493     }
 494 */
 495 /*
 496     if (!dcacheInterface && (memReq->isUncacheable()))
 497         recordEvent("Uncached Read");
 498 */
 499     return LSQ.read(req, data, load_idx);
 500 }
 501
 502 template <class Impl>
 503 template <class T>
 504 Fault
 505 BackEnd<Impl>::write(RequestPtr req, T &data, int store_idx)
 506 {
 507 /*
 508     memReq->reset(addr, sizeof(T), flags);
 509
 510     // translate to physical address
 511     Fault fault = cpu->translateDataWriteReq(memReq);
 512
 513     if (fault == NoFault && dcacheInterface) {
 514         memReq->cmd = Write;
 515         memcpy(memReq->data,(uint8_t *)&data,memReq->size);
 516         memReq->completionEvent = NULL;
 517         memReq->time = curTick;
 518         memReq->flags &= ~INST_READ;
 519         MemAccessResult result = dcacheInterface->access(memReq);
 520
 521         // Ugly hack to get an event scheduled *only* if the access is
 522         // a miss.  We really should add first-class support for this
 523         // at some point.
 524         if (result != MA_HIT && dcacheInterface->doEvents()) {
 525             memReq->completionEvent = &cacheCompletionEvent;
 526             lastDcacheStall = curTick;
 527 //          unscheduleTickEvent();
 528 //          status = DcacheMissStall;
 529             DPRINTF(OzoneCPU, "Dcache miss stall!\n");
 530         }
 531     }
 532
 533     if (res && (fault == NoFault))
 534         *res = memReq->result;
 535         */
 536 /*
 537     if (!dcacheInterface && (memReq->isUncacheable()))
 538         recordEvent("Uncached Write");
 539 */
 540     return LSQ.write(req, data, store_idx);
 541 }
 542
 543 #endif // __CPU_OZONE_BACK_END_HH__