src/cpu/simple/atomic.cc

   1 /*
   2  * Copyright 2014 Google, Inc.
   3  * Copyright (c) 2012-2013,2015,2017-2020 ARM Limited
   4  * All rights reserved.
   5  *
   6  * The license below extends only to copyright in the software and shall
   7  * not be construed as granting a license to any other intellectual
   8  * property including but not limited to intellectual property relating
   9  * to a hardware implementation of the functionality of the software
  10  * licensed hereunder.  You may use the software subject to the license
  11  * terms below provided that you ensure that this notice is replicated
  12  * unmodified and in its entirety in all distributions of the software,
  13  * modified or unmodified, in source code or in binary form.
  14  *
  15  * Copyright (c) 2002-2005 The Regents of The University of Michigan
  16  * All rights reserved.
  17  *
  18  * Redistribution and use in source and binary forms, with or without
  19  * modification, are permitted provided that the following conditions are
  20  * met: redistributions of source code must retain the above copyright
  21  * notice, this list of conditions and the following disclaimer;
  22  * redistributions in binary form must reproduce the above copyright
  23  * notice, this list of conditions and the following disclaimer in the
  24  * documentation and/or other materials provided with the distribution;
  25  * neither the name of the copyright holders nor the names of its
  26  * contributors may be used to endorse or promote products derived from
  27  * this software without specific prior written permission.
  28  *
  29  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  30  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  31  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  32  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  33  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  34  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  35  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  36  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  37  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  38  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  39  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  40  */
  41
  42 #include "cpu/simple/atomic.hh"
  43
  44 #include "arch/locked_mem.hh"
  45 #include "arch/utility.hh"
  46 #include "base/output.hh"
  47 #include "config/the_isa.hh"
  48 #include "cpu/exetrace.hh"
  49 #include "cpu/utils.hh"
  50 #include "debug/Drain.hh"
  51 #include "debug/ExecFaulting.hh"
  52 #include "debug/SimpleCPU.hh"
  53 #include "mem/packet.hh"
  54 #include "mem/packet_access.hh"
  55 #include "mem/physical.hh"
  56 #include "params/AtomicSimpleCPU.hh"
  57 #include "sim/faults.hh"
  58 #include "sim/full_system.hh"
  59 #include "sim/system.hh"
  60
  61 using namespace std;
  62 using namespace TheISA;
  63
  64 void
  65 AtomicSimpleCPU::init()
  66 {
  67     BaseSimpleCPU::init();
  68
  69     int cid = threadContexts[0]->contextId();
  70     ifetch_req->setContext(cid);
  71     data_read_req->setContext(cid);
  72     data_write_req->setContext(cid);
  73     data_amo_req->setContext(cid);
  74 }
  75
  76 AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
  77     : BaseSimpleCPU(p),
  78       tickEvent([this]{ tick(); }, "AtomicSimpleCPU tick",
  79                 false, Event::CPU_Tick_Pri),
  80       width(p->width), locked(false),
  81       simulate_data_stalls(p->simulate_data_stalls),
  82       simulate_inst_stalls(p->simulate_inst_stalls),
  83       icachePort(name() + ".icache_port", this),
  84       dcachePort(name() + ".dcache_port", this),
  85       dcache_access(false), dcache_latency(0),
  86       ppCommit(nullptr)
  87 {
  88     _status = Idle;
  89     ifetch_req = std::make_shared<Request>();
  90     data_read_req = std::make_shared<Request>();
  91     data_write_req = std::make_shared<Request>();
  92     data_amo_req = std::make_shared<Request>();
  93 }
  94
  95
  96 AtomicSimpleCPU::~AtomicSimpleCPU()
  97 {
  98     if (tickEvent.scheduled()) {
  99         deschedule(tickEvent);
 100     }
 101 }
 102
 103 DrainState
 104 AtomicSimpleCPU::drain()
 105 {
 106     // Deschedule any power gating event (if any)
 107     deschedulePowerGatingEvent();
 108
 109     if (switchedOut())
 110         return DrainState::Drained;
 111
 112     if (!isCpuDrained()) {
 113         DPRINTF(Drain, "Requesting drain.\n");
 114         return DrainState::Draining;
 115     } else {
 116         if (tickEvent.scheduled())
 117             deschedule(tickEvent);
 118
 119         activeThreads.clear();
 120         DPRINTF(Drain, "Not executing microcode, no need to drain.\n");
 121         return DrainState::Drained;
 122     }
 123 }
 124
 125 void
 126 AtomicSimpleCPU::threadSnoop(PacketPtr pkt, ThreadID sender)
 127 {
 128     DPRINTF(SimpleCPU, "%s received snoop pkt for addr:%#x %s\n",
 129             __func__, pkt->getAddr(), pkt->cmdString());
 130
 131     for (ThreadID tid = 0; tid < numThreads; tid++) {
 132         if (tid != sender) {
 133             if (getCpuAddrMonitor(tid)->doMonitor(pkt)) {
 134                 wakeup(tid);
 135             }
 136
 137             TheISA::handleLockedSnoop(threadInfo[tid]->thread,
 138                                       pkt, dcachePort.cacheBlockMask);
 139         }
 140     }
 141 }
 142
 143 void
 144 AtomicSimpleCPU::drainResume()
 145 {
 146     assert(!tickEvent.scheduled());
 147     if (switchedOut())
 148         return;
 149
 150     DPRINTF(SimpleCPU, "Resume\n");
 151     verifyMemoryMode();
 152
 153     assert(!threadContexts.empty());
 154
 155     _status = BaseSimpleCPU::Idle;
 156
 157     for (ThreadID tid = 0; tid < numThreads; tid++) {
 158         if (threadInfo[tid]->thread->status() == ThreadContext::Active) {
 159             threadInfo[tid]->notIdleFraction = 1;
 160             activeThreads.push_back(tid);
 161             _status = BaseSimpleCPU::Running;
 162
 163             // Tick if any threads active
 164             if (!tickEvent.scheduled()) {
 165                 schedule(tickEvent, nextCycle());
 166             }
 167         } else {
 168             threadInfo[tid]->notIdleFraction = 0;
 169         }
 170     }
 171
 172     // Reschedule any power gating event (if any)
 173     schedulePowerGatingEvent();
 174 }
 175
 176 bool
 177 AtomicSimpleCPU::tryCompleteDrain()
 178 {
 179     if (drainState() != DrainState::Draining)
 180         return false;
 181
 182     DPRINTF(Drain, "tryCompleteDrain.\n");
 183     if (!isCpuDrained())
 184         return false;
 185
 186     DPRINTF(Drain, "CPU done draining, processing drain event\n");
 187     signalDrainDone();
 188
 189     return true;
 190 }
 191
 192
 193 void
 194 AtomicSimpleCPU::switchOut()
 195 {
 196     BaseSimpleCPU::switchOut();
 197
 198     assert(!tickEvent.scheduled());
 199     assert(_status == BaseSimpleCPU::Running || _status == Idle);
 200     assert(isCpuDrained());
 201 }
 202
 203
 204 void
 205 AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
 206 {
 207     BaseSimpleCPU::takeOverFrom(oldCPU);
 208
 209     // The tick event should have been descheduled by drain()
 210     assert(!tickEvent.scheduled());
 211 }
 212
 213 void
 214 AtomicSimpleCPU::verifyMemoryMode() const
 215 {
 216     if (!system->isAtomicMode()) {
 217         fatal("The atomic CPU requires the memory system to be in "
 218               "'atomic' mode.\n");
 219     }
 220 }
 221
 222 void
 223 AtomicSimpleCPU::activateContext(ThreadID thread_num)
 224 {
 225     DPRINTF(SimpleCPU, "ActivateContext %d\n", thread_num);
 226
 227     assert(thread_num < numThreads);
 228
 229     threadInfo[thread_num]->notIdleFraction = 1;
 230     Cycles delta = ticksToCycles(threadInfo[thread_num]->thread->lastActivate -
 231                                  threadInfo[thread_num]->thread->lastSuspend);
 232     numCycles += delta;
 233
 234     if (!tickEvent.scheduled()) {
 235         //Make sure ticks are still on multiples of cycles
 236         schedule(tickEvent, clockEdge(Cycles(0)));
 237     }
 238     _status = BaseSimpleCPU::Running;
 239     if (std::find(activeThreads.begin(), activeThreads.end(), thread_num)
 240         == activeThreads.end()) {
 241         activeThreads.push_back(thread_num);
 242     }
 243
 244     BaseCPU::activateContext(thread_num);
 245 }
 246
 247
 248 void
 249 AtomicSimpleCPU::suspendContext(ThreadID thread_num)
 250 {
 251     DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
 252
 253     assert(thread_num < numThreads);
 254     activeThreads.remove(thread_num);
 255
 256     if (_status == Idle)
 257         return;
 258
 259     assert(_status == BaseSimpleCPU::Running);
 260
 261     threadInfo[thread_num]->notIdleFraction = 0;
 262
 263     if (activeThreads.empty()) {
 264         _status = Idle;
 265
 266         if (tickEvent.scheduled()) {
 267             deschedule(tickEvent);
 268         }
 269     }
 270
 271     BaseCPU::suspendContext(thread_num);
 272 }
 273
 274 Tick
 275 AtomicSimpleCPU::sendPacket(RequestPort &port, const PacketPtr &pkt)
 276 {
 277     return port.sendAtomic(pkt);
 278 }
 279
 280 Tick
 281 AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt)
 282 {
 283     DPRINTF(SimpleCPU, "%s received atomic snoop pkt for addr:%#x %s\n",
 284             __func__, pkt->getAddr(), pkt->cmdString());
 285
 286     // X86 ISA: Snooping an invalidation for monitor/mwait
 287     AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
 288
 289     for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
 290         if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
 291             cpu->wakeup(tid);
 292         }
 293     }
 294
 295     // if snoop invalidates, release any associated locks
 296     // When run without caches, Invalidation packets will not be received
 297     // hence we must check if the incoming packets are writes and wakeup
 298     // the processor accordingly
 299     if (pkt->isInvalidate() || pkt->isWrite()) {
 300         DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
 301                 pkt->getAddr());
 302         for (auto &t_info : cpu->threadInfo) {
 303             TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
 304         }
 305     }
 306
 307     return 0;
 308 }
 309
 310 void
 311 AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt)
 312 {
 313     DPRINTF(SimpleCPU, "%s received functional snoop pkt for addr:%#x %s\n",
 314             __func__, pkt->getAddr(), pkt->cmdString());
 315
 316     // X86 ISA: Snooping an invalidation for monitor/mwait
 317     AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
 318     for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
 319         if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
 320             cpu->wakeup(tid);
 321         }
 322     }
 323
 324     // if snoop invalidates, release any associated locks
 325     if (pkt->isInvalidate()) {
 326         DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
 327                 pkt->getAddr());
 328         for (auto &t_info : cpu->threadInfo) {
 329             TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
 330         }
 331     }
 332 }
 333
 334 bool
 335 AtomicSimpleCPU::genMemFragmentRequest(const RequestPtr& req, Addr frag_addr,
 336                                        int size, Request::Flags flags,
 337                                        const std::vector<bool>& byte_enable,
 338                                        int& frag_size, int& size_left) const
 339 {
 340     bool predicate = true;
 341     Addr inst_addr = threadInfo[curThread]->thread->pcState().instAddr();
 342
 343     frag_size = std::min(
 344         cacheLineSize() - addrBlockOffset(frag_addr, cacheLineSize()),
 345         (Addr) size_left);
 346     size_left -= frag_size;
 347
 348     if (!byte_enable.empty()) {
 349         // Set up byte-enable mask for the current fragment
 350         auto it_start = byte_enable.begin() + (size - (frag_size + size_left));
 351         auto it_end = byte_enable.begin() + (size - size_left);
 352         if (isAnyActiveElement(it_start, it_end)) {
 353             req->setVirt(frag_addr, frag_size, flags, dataRequestorId(),
 354                          inst_addr);
 355             req->setByteEnable(std::vector<bool>(it_start, it_end));
 356         } else {
 357             predicate = false;
 358         }
 359     } else {
 360         req->setVirt(frag_addr, frag_size, flags, dataRequestorId(),
 361                      inst_addr);
 362         req->setByteEnable(std::vector<bool>());
 363     }
 364
 365     return predicate;
 366 }
 367
 368 Fault
 369 AtomicSimpleCPU::readMem(Addr addr, uint8_t * data, unsigned size,
 370                          Request::Flags flags,
 371                          const std::vector<bool>& byte_enable)
 372 {
 373     SimpleExecContext& t_info = *threadInfo[curThread];
 374     SimpleThread* thread = t_info.thread;
 375
 376     // use the CPU's statically allocated read request and packet objects
 377     const RequestPtr &req = data_read_req;
 378
 379     if (traceData)
 380         traceData->setMem(addr, size, flags);
 381
 382     dcache_latency = 0;
 383
 384     req->taskId(taskId());
 385
 386     Addr frag_addr = addr;
 387     int frag_size = 0;
 388     int size_left = size;
 389     bool predicate;
 390     Fault fault = NoFault;
 391
 392     while (1) {
 393         predicate = genMemFragmentRequest(req, frag_addr, size, flags,
 394                                           byte_enable, frag_size, size_left);
 395
 396         // translate to physical address
 397         if (predicate) {
 398             fault = thread->dtb->translateAtomic(req, thread->getTC(),
 399                                                  BaseTLB::Read);
 400         }
 401
 402         // Now do the access.
 403         if (predicate && fault == NoFault &&
 404             !req->getFlags().isSet(Request::NO_ACCESS)) {
 405             Packet pkt(req, Packet::makeReadCmd(req));
 406             pkt.dataStatic(data);
 407
 408             if (req->isLocalAccess()) {
 409                 dcache_latency += req->localAccessor(thread->getTC(), &pkt);
 410             } else {
 411                 dcache_latency += sendPacket(dcachePort, &pkt);
 412             }
 413             dcache_access = true;
 414
 415             assert(!pkt.isError());
 416
 417             if (req->isLLSC()) {
 418                 TheISA::handleLockedRead(thread, req);
 419             }
 420         }
 421
 422         //If there's a fault, return it
 423         if (fault != NoFault) {
 424             if (req->isPrefetch()) {
 425                 return NoFault;
 426             } else {
 427                 return fault;
 428             }
 429         }
 430
 431         // If we don't need to access further cache lines, stop now.
 432         if (size_left == 0) {
 433             if (req->isLockedRMW() && fault == NoFault) {
 434                 assert(!locked);
 435                 locked = true;
 436             }
 437             return fault;
 438         }
 439
 440         /*
 441          * Set up for accessing the next cache line.
 442          */
 443         frag_addr += frag_size;
 444
 445         //Move the pointer we're reading into to the correct location.
 446         data += frag_size;
 447     }
 448 }
 449
 450 Fault
 451 AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr,
 452                           Request::Flags flags, uint64_t *res,
 453                           const std::vector<bool>& byte_enable)
 454 {
 455     SimpleExecContext& t_info = *threadInfo[curThread];
 456     SimpleThread* thread = t_info.thread;
 457     static uint8_t zero_array[64] = {};
 458
 459     if (data == NULL) {
 460         assert(size <= 64);
 461         assert(flags & Request::STORE_NO_DATA);
 462         // This must be a cache block cleaning request
 463         data = zero_array;
 464     }
 465
 466     // use the CPU's statically allocated write request and packet objects
 467     const RequestPtr &req = data_write_req;
 468
 469     if (traceData)
 470         traceData->setMem(addr, size, flags);
 471
 472     dcache_latency = 0;
 473
 474     req->taskId(taskId());
 475
 476     Addr frag_addr = addr;
 477     int frag_size = 0;
 478     int size_left = size;
 479     int curr_frag_id = 0;
 480     bool predicate;
 481     Fault fault = NoFault;
 482
 483     while (1) {
 484         predicate = genMemFragmentRequest(req, frag_addr, size, flags,
 485                                           byte_enable, frag_size, size_left);
 486
 487         // translate to physical address
 488         if (predicate)
 489             fault = thread->dtb->translateAtomic(req, thread->getTC(),
 490                                                  BaseTLB::Write);
 491
 492         // Now do the access.
 493         if (predicate && fault == NoFault) {
 494             bool do_access = true;  // flag to suppress cache access
 495
 496             if (req->isLLSC()) {
 497                 assert(curr_frag_id == 0);
 498                 do_access =
 499                     TheISA::handleLockedWrite(thread, req,
 500                                               dcachePort.cacheBlockMask);
 501             } else if (req->isSwap()) {
 502                 assert(curr_frag_id == 0);
 503                 if (req->isCondSwap()) {
 504                     assert(res);
 505                     req->setExtraData(*res);
 506                 }
 507             }
 508
 509             if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) {
 510                 Packet pkt(req, Packet::makeWriteCmd(req));
 511                 pkt.dataStatic(data);
 512
 513                 if (req->isLocalAccess()) {
 514                     dcache_latency +=
 515                         req->localAccessor(thread->getTC(), &pkt);
 516                 } else {
 517                     dcache_latency += sendPacket(dcachePort, &pkt);
 518
 519                     // Notify other threads on this CPU of write
 520                     threadSnoop(&pkt, curThread);
 521                 }
 522                 dcache_access = true;
 523                 assert(!pkt.isError());
 524
 525                 if (req->isSwap()) {
 526                     assert(res && curr_frag_id == 0);
 527                     memcpy(res, pkt.getConstPtr<uint8_t>(), size);
 528                 }
 529             }
 530
 531             if (res && !req->isSwap()) {
 532                 *res = req->getExtraData();
 533             }
 534         }
 535
 536         //If there's a fault or we don't need to access a second cache line,
 537         //stop now.
 538         if (fault != NoFault || size_left == 0)
 539         {
 540             if (req->isLockedRMW() && fault == NoFault) {
 541                 assert(!req->isMasked());
 542                 locked = false;
 543             }
 544
 545             if (fault != NoFault && req->isPrefetch()) {
 546                 return NoFault;
 547             } else {
 548                 return fault;
 549             }
 550         }
 551
 552         /*
 553          * Set up for accessing the next cache line.
 554          */
 555         frag_addr += frag_size;
 556
 557         //Move the pointer we're reading into to the correct location.
 558         data += frag_size;
 559
 560         curr_frag_id++;
 561     }
 562 }
 563
 564 Fault
 565 AtomicSimpleCPU::amoMem(Addr addr, uint8_t* data, unsigned size,
 566                         Request::Flags flags, AtomicOpFunctorPtr amo_op)
 567 {
 568     SimpleExecContext& t_info = *threadInfo[curThread];
 569     SimpleThread* thread = t_info.thread;
 570
 571     // use the CPU's statically allocated amo request and packet objects
 572     const RequestPtr &req = data_amo_req;
 573
 574     if (traceData)
 575         traceData->setMem(addr, size, flags);
 576
 577     //The address of the second part of this access if it needs to be split
 578     //across a cache line boundary.
 579     Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
 580
 581     // AMO requests that access across a cache line boundary are not
 582     // allowed since the cache does not guarantee AMO ops to be executed
 583     // atomically in two cache lines
 584     // For ISAs such as x86 that requires AMO operations to work on
 585     // accesses that cross cache-line boundaries, the cache needs to be
 586     // modified to support locking both cache lines to guarantee the
 587     // atomicity.
 588     if (secondAddr > addr) {
 589         panic("AMO request should not access across a cache line boundary\n");
 590     }
 591
 592     dcache_latency = 0;
 593
 594     req->taskId(taskId());
 595     req->setVirt(addr, size, flags, dataRequestorId(),
 596                  thread->pcState().instAddr(), std::move(amo_op));
 597
 598     // translate to physical address
 599     Fault fault = thread->dtb->translateAtomic(req, thread->getTC(),
 600                                                       BaseTLB::Write);
 601
 602     // Now do the access.
 603     if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
 604         // We treat AMO accesses as Write accesses with SwapReq command
 605         // data will hold the return data of the AMO access
 606         Packet pkt(req, Packet::makeWriteCmd(req));
 607         pkt.dataStatic(data);
 608
 609         if (req->isLocalAccess())
 610             dcache_latency += req->localAccessor(thread->getTC(), &pkt);
 611         else {
 612             dcache_latency += sendPacket(dcachePort, &pkt);
 613         }
 614
 615         dcache_access = true;
 616
 617         assert(!pkt.isError());
 618         assert(!req->isLLSC());
 619     }
 620
 621     if (fault != NoFault && req->isPrefetch()) {
 622         return NoFault;
 623     }
 624
 625     //If there's a fault and we're not doing prefetch, return it
 626     return fault;
 627 }
 628
 629 void
 630 AtomicSimpleCPU::tick()
 631 {
 632     DPRINTF(SimpleCPU, "Tick\n");
 633
 634     // Change thread if multi-threaded
 635     swapActiveThread();
 636
 637     // Set memroy request ids to current thread
 638     if (numThreads > 1) {
 639         ContextID cid = threadContexts[curThread]->contextId();
 640
 641         ifetch_req->setContext(cid);
 642         data_read_req->setContext(cid);
 643         data_write_req->setContext(cid);
 644         data_amo_req->setContext(cid);
 645     }
 646
 647     SimpleExecContext& t_info = *threadInfo[curThread];
 648     SimpleThread* thread = t_info.thread;
 649
 650     Tick latency = 0;
 651
 652     for (int i = 0; i < width || locked; ++i) {
 653         numCycles++;
 654         updateCycleCounters(BaseCPU::CPU_STATE_ON);
 655
 656         if (!curStaticInst || !curStaticInst->isDelayedCommit()) {
 657             checkForInterrupts();
 658             checkPcEventQueue();
 659         }
 660
 661         // We must have just got suspended by a PC event
 662         if (_status == Idle) {
 663             tryCompleteDrain();
 664             return;
 665         }
 666
 667         Fault fault = NoFault;
 668
 669         TheISA::PCState pcState = thread->pcState();
 670
 671         bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
 672                            !curMacroStaticInst;
 673         if (needToFetch) {
 674             ifetch_req->taskId(taskId());
 675             setupFetchRequest(ifetch_req);
 676             fault = thread->itb->translateAtomic(ifetch_req, thread->getTC(),
 677                                                  BaseTLB::Execute);
 678         }
 679
 680         if (fault == NoFault) {
 681             Tick icache_latency = 0;
 682             bool icache_access = false;
 683             dcache_access = false; // assume no dcache access
 684
 685             if (needToFetch) {
 686                 // This is commented out because the decoder would act like
 687                 // a tiny cache otherwise. It wouldn't be flushed when needed
 688                 // like the I cache. It should be flushed, and when that works
 689                 // this code should be uncommented.
 690                 //Fetch more instruction memory if necessary
 691                 //if (decoder.needMoreBytes())
 692                 //{
 693                     icache_access = true;
 694                     Packet ifetch_pkt = Packet(ifetch_req, MemCmd::ReadReq);
 695                     ifetch_pkt.dataStatic(&inst);
 696
 697                     icache_latency = sendPacket(icachePort, &ifetch_pkt);
 698
 699                     assert(!ifetch_pkt.isError());
 700
 701                     // ifetch_req is initialized to read the instruction directly
 702                     // into the CPU object's inst field.
 703                 //}
 704             }
 705
 706             preExecute();
 707
 708             Tick stall_ticks = 0;
 709             if (curStaticInst) {
 710                 fault = curStaticInst->execute(&t_info, traceData);
 711
 712                 // keep an instruction count
 713                 if (fault == NoFault) {
 714                     countInst();
 715                     ppCommit->notify(std::make_pair(thread, curStaticInst));
 716                 } else if (traceData) {
 717                     traceFault();
 718                 }
 719
 720                 if (fault != NoFault &&
 721                     dynamic_pointer_cast<SyscallRetryFault>(fault)) {
 722                     // Retry execution of system calls after a delay.
 723                     // Prevents immediate re-execution since conditions which
 724                     // caused the retry are unlikely to change every tick.
 725                     stall_ticks += clockEdge(syscallRetryLatency) - curTick();
 726                 }
 727
 728                 postExecute();
 729             }
 730
 731             // @todo remove me after debugging with legion done
 732             if (curStaticInst && (!curStaticInst->isMicroop() ||
 733                         curStaticInst->isFirstMicroop()))
 734                 instCnt++;
 735
 736             if (simulate_inst_stalls && icache_access)
 737                 stall_ticks += icache_latency;
 738
 739             if (simulate_data_stalls && dcache_access)
 740                 stall_ticks += dcache_latency;
 741
 742             if (stall_ticks) {
 743                 // the atomic cpu does its accounting in ticks, so
 744                 // keep counting in ticks but round to the clock
 745                 // period
 746                 latency += divCeil(stall_ticks, clockPeriod()) *
 747                     clockPeriod();
 748             }
 749
 750         }
 751         if (fault != NoFault || !t_info.stayAtPC)
 752             advancePC(fault);
 753     }
 754
 755     if (tryCompleteDrain())
 756         return;
 757
 758     // instruction takes at least one cycle
 759     if (latency < clockPeriod())
 760         latency = clockPeriod();
 761
 762     if (_status != Idle)
 763         reschedule(tickEvent, curTick() + latency, true);
 764 }
 765
 766 void
 767 AtomicSimpleCPU::regProbePoints()
 768 {
 769     BaseCPU::regProbePoints();
 770
 771     ppCommit = new ProbePointArg<pair<SimpleThread*, const StaticInstPtr>>
 772                                 (getProbeManager(), "Commit");
 773 }
 774
 775 void
 776 AtomicSimpleCPU::printAddr(Addr a)
 777 {
 778     dcachePort.printAddr(a);
 779 }
 780
 781 ////////////////////////////////////////////////////////////////////////
 782 //
 783 //  AtomicSimpleCPU Simulation Object
 784 //
 785 AtomicSimpleCPU *
 786 AtomicSimpleCPUParams::create()
 787 {
 788     return new AtomicSimpleCPU(this);
 789 }