src/cpu/simple/atomic.cc

   1 /*
   2  * Copyright (c) 2002-2005 The Regents of The University of Michigan
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions are
   7  * met: redistributions of source code must retain the above copyright
   8  * notice, this list of conditions and the following disclaimer;
   9  * redistributions in binary form must reproduce the above copyright
  10  * notice, this list of conditions and the following disclaimer in the
  11  * documentation and/or other materials provided with the distribution;
  12  * neither the name of the copyright holders nor the names of its
  13  * contributors may be used to endorse or promote products derived from
  14  * this software without specific prior written permission.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  17  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  20  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  *
  28  * Authors: Steve Reinhardt
  29  */
  30
  31 #include "arch/locked_mem.hh"
  32 #include "arch/mmaped_ipr.hh"
  33 #include "arch/utility.hh"
  34 #include "base/bigint.hh"
  35 #include "cpu/exetrace.hh"
  36 #include "cpu/simple/atomic.hh"
  37 #include "mem/packet.hh"
  38 #include "mem/packet_access.hh"
  39 #include "params/AtomicSimpleCPU.hh"
  40 #include "sim/system.hh"
  41
  42 using namespace std;
  43 using namespace TheISA;
  44
  45 AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c)
  46     : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c)
  47 {
  48 }
  49
  50
  51 void
  52 AtomicSimpleCPU::TickEvent::process()
  53 {
  54     cpu->tick();
  55 }
  56
  57 const char *
  58 AtomicSimpleCPU::TickEvent::description() const
  59 {
  60     return "AtomicSimpleCPU tick";
  61 }
  62
  63 Port *
  64 AtomicSimpleCPU::getPort(const std::string &if_name, int idx)
  65 {
  66     if (if_name == "dcache_port")
  67         return &dcachePort;
  68     else if (if_name == "icache_port")
  69         return &icachePort;
  70     else if (if_name == "physmem_port") {
  71         hasPhysMemPort = true;
  72         return &physmemPort;
  73     }
  74     else
  75         panic("No Such Port\n");
  76 }
  77
  78 void
  79 AtomicSimpleCPU::init()
  80 {
  81     BaseCPU::init();
  82     cpuId = tc->readCpuId();
  83 #if FULL_SYSTEM
  84     for (int i = 0; i < threadContexts.size(); ++i) {
  85         ThreadContext *tc = threadContexts[i];
  86
  87         // initialize CPU, including PC
  88         TheISA::initCPU(tc, cpuId);
  89     }
  90 #endif
  91     if (hasPhysMemPort) {
  92         bool snoop = false;
  93         AddrRangeList pmAddrList;
  94         physmemPort.getPeerAddressRanges(pmAddrList, snoop);
  95         physMemAddr = *pmAddrList.begin();
  96     }
  97     ifetch_req.setThreadContext(cpuId, 0); // Add thread ID if we add MT
  98     data_read_req.setThreadContext(cpuId, 0); // Add thread ID here too
  99     data_write_req.setThreadContext(cpuId, 0); // Add thread ID here too
 100 }
 101
 102 bool
 103 AtomicSimpleCPU::CpuPort::recvTiming(PacketPtr pkt)
 104 {
 105     panic("AtomicSimpleCPU doesn't expect recvTiming callback!");
 106     return true;
 107 }
 108
 109 Tick
 110 AtomicSimpleCPU::CpuPort::recvAtomic(PacketPtr pkt)
 111 {
 112     //Snooping a coherence request, just return
 113     return 0;
 114 }
 115
 116 void
 117 AtomicSimpleCPU::CpuPort::recvFunctional(PacketPtr pkt)
 118 {
 119     //No internal storage to update, just return
 120     return;
 121 }
 122
 123 void
 124 AtomicSimpleCPU::CpuPort::recvStatusChange(Status status)
 125 {
 126     if (status == RangeChange) {
 127         if (!snoopRangeSent) {
 128             snoopRangeSent = true;
 129             sendStatusChange(Port::RangeChange);
 130         }
 131         return;
 132     }
 133
 134     panic("AtomicSimpleCPU doesn't expect recvStatusChange callback!");
 135 }
 136
 137 void
 138 AtomicSimpleCPU::CpuPort::recvRetry()
 139 {
 140     panic("AtomicSimpleCPU doesn't expect recvRetry callback!");
 141 }
 142
 143 void
 144 AtomicSimpleCPU::DcachePort::setPeer(Port *port)
 145 {
 146     Port::setPeer(port);
 147
 148 #if FULL_SYSTEM
 149     // Update the ThreadContext's memory ports (Functional/Virtual
 150     // Ports)
 151     cpu->tcBase()->connectMemPorts();
 152 #endif
 153 }
 154
 155 AtomicSimpleCPU::AtomicSimpleCPU(Params *p)
 156     : BaseSimpleCPU(p), tickEvent(this), width(p->width),
 157       simulate_data_stalls(p->simulate_data_stalls),
 158       simulate_inst_stalls(p->simulate_inst_stalls),
 159       icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this),
 160       physmemPort(name() + "-iport", this), hasPhysMemPort(false)
 161 {
 162     _status = Idle;
 163
 164     icachePort.snoopRangeSent = false;
 165     dcachePort.snoopRangeSent = false;
 166
 167 }
 168
 169
 170 AtomicSimpleCPU::~AtomicSimpleCPU()
 171 {
 172 }
 173
 174 void
 175 AtomicSimpleCPU::serialize(ostream &os)
 176 {
 177     SimObject::State so_state = SimObject::getState();
 178     SERIALIZE_ENUM(so_state);
 179     Status _status = status();
 180     SERIALIZE_ENUM(_status);
 181     BaseSimpleCPU::serialize(os);
 182     nameOut(os, csprintf("%s.tickEvent", name()));
 183     tickEvent.serialize(os);
 184 }
 185
 186 void
 187 AtomicSimpleCPU::unserialize(Checkpoint *cp, const string &section)
 188 {
 189     SimObject::State so_state;
 190     UNSERIALIZE_ENUM(so_state);
 191     UNSERIALIZE_ENUM(_status);
 192     BaseSimpleCPU::unserialize(cp, section);
 193     tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
 194 }
 195
 196 void
 197 AtomicSimpleCPU::resume()
 198 {
 199     if (_status == Idle || _status == SwitchedOut)
 200         return;
 201
 202     DPRINTF(SimpleCPU, "Resume\n");
 203     assert(system->getMemoryMode() == Enums::atomic);
 204
 205     changeState(SimObject::Running);
 206     if (thread->status() == ThreadContext::Active) {
 207         if (!tickEvent.scheduled()) {
 208             tickEvent.schedule(nextCycle());
 209         }
 210     }
 211 }
 212
 213 void
 214 AtomicSimpleCPU::switchOut()
 215 {
 216     assert(status() == Running || status() == Idle);
 217     _status = SwitchedOut;
 218
 219     tickEvent.squash();
 220 }
 221
 222
 223 void
 224 AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
 225 {
 226     BaseCPU::takeOverFrom(oldCPU, &icachePort, &dcachePort);
 227
 228     assert(!tickEvent.scheduled());
 229
 230     // if any of this CPU's ThreadContexts are active, mark the CPU as
 231     // running and schedule its tick event.
 232     for (int i = 0; i < threadContexts.size(); ++i) {
 233         ThreadContext *tc = threadContexts[i];
 234         if (tc->status() == ThreadContext::Active && _status != Running) {
 235             _status = Running;
 236             tickEvent.schedule(nextCycle());
 237             break;
 238         }
 239     }
 240     if (_status != Running) {
 241         _status = Idle;
 242     }
 243     assert(threadContexts.size() == 1);
 244     cpuId = tc->readCpuId();
 245     ifetch_req.setThreadContext(cpuId, 0); // Add thread ID if we add MT
 246     data_read_req.setThreadContext(cpuId, 0); // Add thread ID here too
 247     data_write_req.setThreadContext(cpuId, 0); // Add thread ID here too
 248 }
 249
 250
 251 void
 252 AtomicSimpleCPU::activateContext(int thread_num, int delay)
 253 {
 254     DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay);
 255
 256     assert(thread_num == 0);
 257     assert(thread);
 258
 259     assert(_status == Idle);
 260     assert(!tickEvent.scheduled());
 261
 262     notIdleFraction++;
 263     numCycles += tickToCycles(thread->lastActivate - thread->lastSuspend);
 264
 265     //Make sure ticks are still on multiples of cycles
 266     tickEvent.schedule(nextCycle(curTick + ticks(delay)));
 267     _status = Running;
 268 }
 269
 270
 271 void
 272 AtomicSimpleCPU::suspendContext(int thread_num)
 273 {
 274     DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
 275
 276     assert(thread_num == 0);
 277     assert(thread);
 278
 279     assert(_status == Running);
 280
 281     // tick event may not be scheduled if this gets called from inside
 282     // an instruction's execution, e.g. "quiesce"
 283     if (tickEvent.scheduled())
 284         tickEvent.deschedule();
 285
 286     notIdleFraction--;
 287     _status = Idle;
 288 }
 289
 290
 291 template <class T>
 292 Fault
 293 AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
 294 {
 295     // use the CPU's statically allocated read request and packet objects
 296     Request *req = &data_read_req;
 297
 298     if (traceData) {
 299         traceData->setAddr(addr);
 300     }
 301
 302     //The block size of our peer.
 303     int blockSize = dcachePort.peerBlockSize();
 304     //The size of the data we're trying to read.
 305     int dataSize = sizeof(T);
 306
 307     uint8_t * dataPtr = (uint8_t *)&data;
 308
 309     //The address of the second part of this access if it needs to be split
 310     //across a cache line boundary.
 311     Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
 312
 313     if(secondAddr > addr)
 314         dataSize = secondAddr - addr;
 315
 316     dcache_latency = 0;
 317
 318     while(1) {
 319         req->setVirt(0, addr, dataSize, flags, thread->readPC());
 320
 321         // translate to physical address
 322         Fault fault = thread->translateDataReadReq(req);
 323
 324         // Now do the access.
 325         if (fault == NoFault) {
 326             Packet pkt = Packet(req,
 327                     req->isLocked() ? MemCmd::LoadLockedReq : MemCmd::ReadReq,
 328                     Packet::Broadcast);
 329             pkt.dataStatic(dataPtr);
 330
 331             if (req->isMmapedIpr())
 332                 dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt);
 333             else {
 334                 if (hasPhysMemPort && pkt.getAddr() == physMemAddr)
 335                     dcache_latency += physmemPort.sendAtomic(&pkt);
 336                 else
 337                     dcache_latency += dcachePort.sendAtomic(&pkt);
 338             }
 339             dcache_access = true;
 340
 341             assert(!pkt.isError());
 342
 343             if (req->isLocked()) {
 344                 TheISA::handleLockedRead(thread, req);
 345             }
 346         }
 347
 348         // This will need a new way to tell if it has a dcache attached.
 349         if (req->isUncacheable())
 350             recordEvent("Uncached Read");
 351
 352         //If there's a fault, return it
 353         if (fault != NoFault)
 354             return fault;
 355         //If we don't need to access a second cache line, stop now.
 356         if (secondAddr <= addr)
 357         {
 358             data = gtoh(data);
 359             if (traceData) {
 360                 traceData->setData(data);
 361             }
 362             return fault;
 363         }
 364
 365         /*
 366          * Set up for accessing the second cache line.
 367          */
 368
 369         //Move the pointer we're reading into to the correct location.
 370         dataPtr += dataSize;
 371         //Adjust the size to get the remaining bytes.
 372         dataSize = addr + sizeof(T) - secondAddr;
 373         //And access the right address.
 374         addr = secondAddr;
 375     }
 376 }
 377
 378 Fault
 379 AtomicSimpleCPU::translateDataReadAddr(Addr vaddr, Addr & paddr,
 380         int size, unsigned flags)
 381 {
 382     // use the CPU's statically allocated read request and packet objects
 383     Request *req = &data_read_req;
 384
 385     if (traceData) {
 386         traceData->setAddr(vaddr);
 387     }
 388
 389     //The block size of our peer.
 390     int blockSize = dcachePort.peerBlockSize();
 391     //The size of the data we're trying to read.
 392     int dataSize = size;
 393
 394     bool firstTimeThrough = true;
 395
 396     //The address of the second part of this access if it needs to be split
 397     //across a cache line boundary.
 398     Addr secondAddr = roundDown(vaddr + dataSize - 1, blockSize);
 399
 400     if(secondAddr > vaddr)
 401         dataSize = secondAddr - vaddr;
 402
 403     while(1) {
 404         req->setVirt(0, vaddr, dataSize, flags, thread->readPC());
 405
 406         // translate to physical address
 407         Fault fault = thread->translateDataReadReq(req);
 408
 409         //If there's a fault, return it
 410         if (fault != NoFault)
 411             return fault;
 412
 413         if (firstTimeThrough) {
 414             paddr = req->getPaddr();
 415             firstTimeThrough = false;
 416         }
 417
 418         //If we don't need to access a second cache line, stop now.
 419         if (secondAddr <= vaddr)
 420             return fault;
 421
 422         /*
 423          * Set up for accessing the second cache line.
 424          */
 425
 426         //Adjust the size to get the remaining bytes.
 427         dataSize = vaddr + size - secondAddr;
 428         //And access the right address.
 429         vaddr = secondAddr;
 430     }
 431 }
 432
 433 #ifndef DOXYGEN_SHOULD_SKIP_THIS
 434
 435 template
 436 Fault
 437 AtomicSimpleCPU::read(Addr addr, Twin32_t &data, unsigned flags);
 438
 439 template
 440 Fault
 441 AtomicSimpleCPU::read(Addr addr, Twin64_t &data, unsigned flags);
 442
 443 template
 444 Fault
 445 AtomicSimpleCPU::read(Addr addr, uint64_t &data, unsigned flags);
 446
 447 template
 448 Fault
 449 AtomicSimpleCPU::read(Addr addr, uint32_t &data, unsigned flags);
 450
 451 template
 452 Fault
 453 AtomicSimpleCPU::read(Addr addr, uint16_t &data, unsigned flags);
 454
 455 template
 456 Fault
 457 AtomicSimpleCPU::read(Addr addr, uint8_t &data, unsigned flags);
 458
 459 #endif //DOXYGEN_SHOULD_SKIP_THIS
 460
 461 template<>
 462 Fault
 463 AtomicSimpleCPU::read(Addr addr, double &data, unsigned flags)
 464 {
 465     return read(addr, *(uint64_t*)&data, flags);
 466 }
 467
 468 template<>
 469 Fault
 470 AtomicSimpleCPU::read(Addr addr, float &data, unsigned flags)
 471 {
 472     return read(addr, *(uint32_t*)&data, flags);
 473 }
 474
 475
 476 template<>
 477 Fault
 478 AtomicSimpleCPU::read(Addr addr, int32_t &data, unsigned flags)
 479 {
 480     return read(addr, (uint32_t&)data, flags);
 481 }
 482
 483
 484 template <class T>
 485 Fault
 486 AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
 487 {
 488     // use the CPU's statically allocated write request and packet objects
 489     Request *req = &data_write_req;
 490
 491     if (traceData) {
 492         traceData->setAddr(addr);
 493     }
 494
 495     //The block size of our peer.
 496     int blockSize = dcachePort.peerBlockSize();
 497     //The size of the data we're trying to read.
 498     int dataSize = sizeof(T);
 499
 500     uint8_t * dataPtr = (uint8_t *)&data;
 501
 502     //The address of the second part of this access if it needs to be split
 503     //across a cache line boundary.
 504     Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
 505
 506     if(secondAddr > addr)
 507         dataSize = secondAddr - addr;
 508
 509     dcache_latency = 0;
 510
 511     while(1) {
 512         req->setVirt(0, addr, dataSize, flags, thread->readPC());
 513
 514         // translate to physical address
 515         Fault fault = thread->translateDataWriteReq(req);
 516
 517         // Now do the access.
 518         if (fault == NoFault) {
 519             MemCmd cmd = MemCmd::WriteReq; // default
 520             bool do_access = true;  // flag to suppress cache access
 521
 522             if (req->isLocked()) {
 523                 cmd = MemCmd::StoreCondReq;
 524                 do_access = TheISA::handleLockedWrite(thread, req);
 525             } else if (req->isSwap()) {
 526                 cmd = MemCmd::SwapReq;
 527                 if (req->isCondSwap()) {
 528                     assert(res);
 529                     req->setExtraData(*res);
 530                 }
 531             }
 532
 533             if (do_access) {
 534                 Packet pkt = Packet(req, cmd, Packet::Broadcast);
 535                 pkt.dataStatic(dataPtr);
 536
 537                 if (req->isMmapedIpr()) {
 538                     dcache_latency +=
 539                         TheISA::handleIprWrite(thread->getTC(), &pkt);
 540                 } else {
 541                     //XXX This needs to be outside of the loop in order to
 542                     //work properly for cache line boundary crossing
 543                     //accesses in transendian simulations.
 544                     data = htog(data);
 545                     if (hasPhysMemPort && pkt.getAddr() == physMemAddr)
 546                         dcache_latency += physmemPort.sendAtomic(&pkt);
 547                     else
 548                         dcache_latency += dcachePort.sendAtomic(&pkt);
 549                 }
 550                 dcache_access = true;
 551                 assert(!pkt.isError());
 552
 553                 if (req->isSwap()) {
 554                     assert(res);
 555                     *res = pkt.get<T>();
 556                 }
 557             }
 558
 559             if (res && !req->isSwap()) {
 560                 *res = req->getExtraData();
 561             }
 562         }
 563
 564         // This will need a new way to tell if it's hooked up to a cache or not.
 565         if (req->isUncacheable())
 566             recordEvent("Uncached Write");
 567
 568         //If there's a fault or we don't need to access a second cache line,
 569         //stop now.
 570         if (fault != NoFault || secondAddr <= addr)
 571         {
 572             // If the write needs to have a fault on the access, consider
 573             // calling changeStatus() and changing it to "bad addr write"
 574             // or something.
 575             if (traceData) {
 576                 traceData->setData(data);
 577             }
 578             return fault;
 579         }
 580
 581         /*
 582          * Set up for accessing the second cache line.
 583          */
 584
 585         //Move the pointer we're reading into to the correct location.
 586         dataPtr += dataSize;
 587         //Adjust the size to get the remaining bytes.
 588         dataSize = addr + sizeof(T) - secondAddr;
 589         //And access the right address.
 590         addr = secondAddr;
 591     }
 592 }
 593
 594 Fault
 595 AtomicSimpleCPU::translateDataWriteAddr(Addr vaddr, Addr &paddr,
 596         int size, unsigned flags)
 597 {
 598     // use the CPU's statically allocated write request and packet objects
 599     Request *req = &data_write_req;
 600
 601     if (traceData) {
 602         traceData->setAddr(vaddr);
 603     }
 604
 605     //The block size of our peer.
 606     int blockSize = dcachePort.peerBlockSize();
 607
 608     //The address of the second part of this access if it needs to be split
 609     //across a cache line boundary.
 610     Addr secondAddr = roundDown(vaddr + size - 1, blockSize);
 611
 612     //The size of the data we're trying to read.
 613     int dataSize = size;
 614
 615     bool firstTimeThrough = true;
 616
 617     if(secondAddr > vaddr)
 618         dataSize = secondAddr - vaddr;
 619
 620     dcache_latency = 0;
 621
 622     while(1) {
 623         req->setVirt(0, vaddr, dataSize, flags, thread->readPC());
 624
 625         // translate to physical address
 626         Fault fault = thread->translateDataWriteReq(req);
 627
 628         //If there's a fault or we don't need to access a second cache line,
 629         //stop now.
 630         if (fault != NoFault)
 631             return fault;
 632
 633         if (firstTimeThrough) {
 634             paddr = req->getPaddr();
 635             firstTimeThrough = false;
 636         }
 637
 638         if (secondAddr <= vaddr)
 639             return fault;
 640
 641         /*
 642          * Set up for accessing the second cache line.
 643          */
 644
 645         //Adjust the size to get the remaining bytes.
 646         dataSize = vaddr + size - secondAddr;
 647         //And access the right address.
 648         vaddr = secondAddr;
 649     }
 650 }
 651
 652
 653 #ifndef DOXYGEN_SHOULD_SKIP_THIS
 654
 655 template
 656 Fault
 657 AtomicSimpleCPU::write(Twin32_t data, Addr addr,
 658                        unsigned flags, uint64_t *res);
 659
 660 template
 661 Fault
 662 AtomicSimpleCPU::write(Twin64_t data, Addr addr,
 663                        unsigned flags, uint64_t *res);
 664
 665 template
 666 Fault
 667 AtomicSimpleCPU::write(uint64_t data, Addr addr,
 668                        unsigned flags, uint64_t *res);
 669
 670 template
 671 Fault
 672 AtomicSimpleCPU::write(uint32_t data, Addr addr,
 673                        unsigned flags, uint64_t *res);
 674
 675 template
 676 Fault
 677 AtomicSimpleCPU::write(uint16_t data, Addr addr,
 678                        unsigned flags, uint64_t *res);
 679
 680 template
 681 Fault
 682 AtomicSimpleCPU::write(uint8_t data, Addr addr,
 683                        unsigned flags, uint64_t *res);
 684
 685 #endif //DOXYGEN_SHOULD_SKIP_THIS
 686
 687 template<>
 688 Fault
 689 AtomicSimpleCPU::write(double data, Addr addr, unsigned flags, uint64_t *res)
 690 {
 691     return write(*(uint64_t*)&data, addr, flags, res);
 692 }
 693
 694 template<>
 695 Fault
 696 AtomicSimpleCPU::write(float data, Addr addr, unsigned flags, uint64_t *res)
 697 {
 698     return write(*(uint32_t*)&data, addr, flags, res);
 699 }
 700
 701
 702 template<>
 703 Fault
 704 AtomicSimpleCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res)
 705 {
 706     return write((uint32_t)data, addr, flags, res);
 707 }
 708
 709
 710 void
 711 AtomicSimpleCPU::tick()
 712 {
 713     DPRINTF(SimpleCPU, "Tick\n");
 714
 715     Tick latency = 0;
 716
 717     for (int i = 0; i < width; ++i) {
 718         numCycles++;
 719
 720         if (!curStaticInst || !curStaticInst->isDelayedCommit())
 721             checkForInterrupts();
 722
 723         checkPcEventQueue();
 724
 725         Fault fault = setupFetchRequest(&ifetch_req);
 726
 727         if (fault == NoFault) {
 728             Tick icache_latency = 0;
 729             bool icache_access = false;
 730             dcache_access = false; // assume no dcache access
 731
 732             //Fetch more instruction memory if necessary
 733             //if(predecoder.needMoreBytes())
 734             //{
 735                 icache_access = true;
 736                 Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq,
 737                                            Packet::Broadcast);
 738                 ifetch_pkt.dataStatic(&inst);
 739
 740                 if (hasPhysMemPort && ifetch_pkt.getAddr() == physMemAddr)
 741                     icache_latency = physmemPort.sendAtomic(&ifetch_pkt);
 742                 else
 743                     icache_latency = icachePort.sendAtomic(&ifetch_pkt);
 744
 745                 assert(!ifetch_pkt.isError());
 746
 747                 // ifetch_req is initialized to read the instruction directly
 748                 // into the CPU object's inst field.
 749             //}
 750
 751             preExecute();
 752
 753             if (curStaticInst) {
 754                 fault = curStaticInst->execute(this, traceData);
 755
 756                 // keep an instruction count
 757                 if (fault == NoFault)
 758                     countInst();
 759                 else if (traceData) {
 760                     // If there was a fault, we should trace this instruction.
 761                     delete traceData;
 762                     traceData = NULL;
 763                 }
 764
 765                 postExecute();
 766             }
 767
 768             // @todo remove me after debugging with legion done
 769             if (curStaticInst && (!curStaticInst->isMicroop() ||
 770                         curStaticInst->isFirstMicroop()))
 771                 instCnt++;
 772
 773             Tick stall_ticks = 0;
 774             if (simulate_inst_stalls && icache_access)
 775                 stall_ticks += icache_latency;
 776
 777             if (simulate_data_stalls && dcache_access)
 778                 stall_ticks += dcache_latency;
 779
 780             if (stall_ticks) {
 781                 Tick stall_cycles = stall_ticks / ticks(1);
 782                 Tick aligned_stall_ticks = ticks(stall_cycles);
 783
 784                 if (aligned_stall_ticks < stall_ticks)
 785                     aligned_stall_ticks += 1;
 786
 787                 latency += aligned_stall_ticks;
 788             }
 789
 790         }
 791         if(fault != NoFault || !stayAtPC)
 792             advancePC(fault);
 793     }
 794
 795     // instruction takes at least one cycle
 796     if (latency < ticks(1))
 797         latency = ticks(1);
 798
 799     if (_status != Idle)
 800         tickEvent.schedule(curTick + latency);
 801 }
 802
 803
 804 void
 805 AtomicSimpleCPU::printAddr(Addr a)
 806 {
 807     dcachePort.printAddr(a);
 808 }
 809
 810
 811 ////////////////////////////////////////////////////////////////////////
 812 //
 813 //  AtomicSimpleCPU Simulation Object
 814 //
 815 AtomicSimpleCPU *
 816 AtomicSimpleCPUParams::create()
 817 {
 818     AtomicSimpleCPU::Params *params = new AtomicSimpleCPU::Params();
 819     params->name = name;
 820     params->numberOfThreads = 1;
 821     params->max_insts_any_thread = max_insts_any_thread;
 822     params->max_insts_all_threads = max_insts_all_threads;
 823     params->max_loads_any_thread = max_loads_any_thread;
 824     params->max_loads_all_threads = max_loads_all_threads;
 825     params->progress_interval = progress_interval;
 826     params->deferRegistration = defer_registration;
 827     params->phase = phase;
 828     params->clock = clock;
 829     params->functionTrace = function_trace;
 830     params->functionTraceStart = function_trace_start;
 831     params->width = width;
 832     params->simulate_data_stalls = simulate_data_stalls;
 833     params->simulate_inst_stalls = simulate_inst_stalls;
 834     params->system = system;
 835     params->cpu_id = cpu_id;
 836     params->tracer = tracer;
 837
 838     params->itb = itb;
 839     params->dtb = dtb;
 840 #if FULL_SYSTEM
 841     params->profile = profile;
 842     params->do_quiesce = do_quiesce;
 843     params->do_checkpoint_insts = do_checkpoint_insts;
 844     params->do_statistics_insts = do_statistics_insts;
 845 #else
 846     if (workload.size() != 1)
 847         panic("only one workload allowed");
 848     params->process = workload[0];
 849 #endif
 850
 851     AtomicSimpleCPU *cpu = new AtomicSimpleCPU(params);
 852     return cpu;
 853 }