src/cpu/simple/atomic.cc

   1 /*
   2  * Copyright (c) 2002-2005 The Regents of The University of Michigan
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions are
   7  * met: redistributions of source code must retain the above copyright
   8  * notice, this list of conditions and the following disclaimer;
   9  * redistributions in binary form must reproduce the above copyright
  10  * notice, this list of conditions and the following disclaimer in the
  11  * documentation and/or other materials provided with the distribution;
  12  * neither the name of the copyright holders nor the names of its
  13  * contributors may be used to endorse or promote products derived from
  14  * this software without specific prior written permission.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  17  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  20  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  *
  28  * Authors: Steve Reinhardt
  29  */
  30
  31 #include "arch/locked_mem.hh"
  32 #include "arch/mmapped_ipr.hh"
  33 #include "arch/utility.hh"
  34 #include "base/bigint.hh"
  35 #include "config/the_isa.hh"
  36 #include "cpu/simple/atomic.hh"
  37 #include "cpu/exetrace.hh"
  38 #include "debug/ExecFaulting.hh"
  39 #include "debug/SimpleCPU.hh"
  40 #include "mem/packet.hh"
  41 #include "mem/packet_access.hh"
  42 #include "params/AtomicSimpleCPU.hh"
  43 #include "sim/faults.hh"
  44 #include "sim/system.hh"
  45 #include "sim/full_system.hh"
  46
  47 using namespace std;
  48 using namespace TheISA;
  49
  50 AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c)
  51     : Event(CPU_Tick_Pri), cpu(c)
  52 {
  53 }
  54
  55
  56 void
  57 AtomicSimpleCPU::TickEvent::process()
  58 {
  59     cpu->tick();
  60 }
  61
  62 const char *
  63 AtomicSimpleCPU::TickEvent::description() const
  64 {
  65     return "AtomicSimpleCPU tick";
  66 }
  67
  68 Port *
  69 AtomicSimpleCPU::getPort(const string &if_name, int idx)
  70 {
  71     if (if_name == "dcache_port")
  72         return &dcachePort;
  73     else if (if_name == "icache_port")
  74         return &icachePort;
  75     else if (if_name == "physmem_port") {
  76         hasPhysMemPort = true;
  77         return &physmemPort;
  78     }
  79     else
  80         panic("No Such Port\n");
  81 }
  82
  83 void
  84 AtomicSimpleCPU::init()
  85 {
  86     BaseCPU::init();
  87     if (FullSystem) {
  88         ThreadID size = threadContexts.size();
  89         for (ThreadID i = 0; i < size; ++i) {
  90             ThreadContext *tc = threadContexts[i];
  91             // initialize CPU, including PC
  92             TheISA::initCPU(tc, tc->contextId());
  93         }
  94     }
  95     if (hasPhysMemPort) {
  96         bool snoop = false;
  97         AddrRangeList pmAddrList;
  98         physmemPort.getPeerAddressRanges(pmAddrList, snoop);
  99         physMemAddr = *pmAddrList.begin();
 100     }
 101     // Atomic doesn't do MT right now, so contextId == threadId
 102     ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
 103     data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
 104     data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
 105 }
 106
 107 bool
 108 AtomicSimpleCPU::CpuPort::recvTiming(PacketPtr pkt)
 109 {
 110     panic("AtomicSimpleCPU doesn't expect recvTiming callback!");
 111     return true;
 112 }
 113
 114 Tick
 115 AtomicSimpleCPU::CpuPort::recvAtomic(PacketPtr pkt)
 116 {
 117     //Snooping a coherence request, just return
 118     return 0;
 119 }
 120
 121 void
 122 AtomicSimpleCPU::CpuPort::recvFunctional(PacketPtr pkt)
 123 {
 124     //No internal storage to update, just return
 125     return;
 126 }
 127
 128 void
 129 AtomicSimpleCPU::CpuPort::recvStatusChange(Status status)
 130 {
 131     if (status == RangeChange) {
 132         if (!snoopRangeSent) {
 133             snoopRangeSent = true;
 134             sendStatusChange(Port::RangeChange);
 135         }
 136         return;
 137     }
 138
 139     panic("AtomicSimpleCPU doesn't expect recvStatusChange callback!");
 140 }
 141
 142 void
 143 AtomicSimpleCPU::CpuPort::recvRetry()
 144 {
 145     panic("AtomicSimpleCPU doesn't expect recvRetry callback!");
 146 }
 147
 148 void
 149 AtomicSimpleCPU::DcachePort::setPeer(Port *port)
 150 {
 151     Port::setPeer(port);
 152
 153     if (FullSystem) {
 154         // Update the ThreadContext's memory ports (Functional/Virtual
 155         // Ports)
 156         cpu->tcBase()->connectMemPorts(cpu->tcBase());
 157     }
 158 }
 159
 160 AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
 161     : BaseSimpleCPU(p), tickEvent(this), width(p->width), locked(false),
 162       simulate_data_stalls(p->simulate_data_stalls),
 163       simulate_inst_stalls(p->simulate_inst_stalls),
 164       icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this),
 165       physmemPort(name() + "-iport", this), hasPhysMemPort(false)
 166 {
 167     _status = Idle;
 168
 169     icachePort.snoopRangeSent = false;
 170     dcachePort.snoopRangeSent = false;
 171
 172 }
 173
 174
 175 AtomicSimpleCPU::~AtomicSimpleCPU()
 176 {
 177     if (tickEvent.scheduled()) {
 178         deschedule(tickEvent);
 179     }
 180 }
 181
 182 void
 183 AtomicSimpleCPU::serialize(ostream &os)
 184 {
 185     SimObject::State so_state = SimObject::getState();
 186     SERIALIZE_ENUM(so_state);
 187     SERIALIZE_SCALAR(locked);
 188     BaseSimpleCPU::serialize(os);
 189     nameOut(os, csprintf("%s.tickEvent", name()));
 190     tickEvent.serialize(os);
 191 }
 192
 193 void
 194 AtomicSimpleCPU::unserialize(Checkpoint *cp, const string &section)
 195 {
 196     SimObject::State so_state;
 197     UNSERIALIZE_ENUM(so_state);
 198     UNSERIALIZE_SCALAR(locked);
 199     BaseSimpleCPU::unserialize(cp, section);
 200     tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
 201 }
 202
 203 void
 204 AtomicSimpleCPU::resume()
 205 {
 206     if (_status == Idle || _status == SwitchedOut)
 207         return;
 208
 209     DPRINTF(SimpleCPU, "Resume\n");
 210     assert(system->getMemoryMode() == Enums::atomic);
 211
 212     changeState(SimObject::Running);
 213     if (thread->status() == ThreadContext::Active) {
 214         if (!tickEvent.scheduled())
 215             schedule(tickEvent, nextCycle());
 216     }
 217     system->totalNumInsts = 0;
 218 }
 219
 220 void
 221 AtomicSimpleCPU::switchOut()
 222 {
 223     assert(_status == Running || _status == Idle);
 224     _status = SwitchedOut;
 225
 226     tickEvent.squash();
 227 }
 228
 229
 230 void
 231 AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
 232 {
 233     BaseCPU::takeOverFrom(oldCPU, &icachePort, &dcachePort);
 234
 235     assert(!tickEvent.scheduled());
 236
 237     // if any of this CPU's ThreadContexts are active, mark the CPU as
 238     // running and schedule its tick event.
 239     ThreadID size = threadContexts.size();
 240     for (ThreadID i = 0; i < size; ++i) {
 241         ThreadContext *tc = threadContexts[i];
 242         if (tc->status() == ThreadContext::Active && _status != Running) {
 243             _status = Running;
 244             schedule(tickEvent, nextCycle());
 245             break;
 246         }
 247     }
 248     if (_status != Running) {
 249         _status = Idle;
 250     }
 251     assert(threadContexts.size() == 1);
 252     ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
 253     data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
 254     data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
 255 }
 256
 257
 258 void
 259 AtomicSimpleCPU::activateContext(int thread_num, int delay)
 260 {
 261     DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay);
 262
 263     assert(thread_num == 0);
 264     assert(thread);
 265
 266     assert(_status == Idle);
 267     assert(!tickEvent.scheduled());
 268
 269     notIdleFraction++;
 270     numCycles += tickToCycles(thread->lastActivate - thread->lastSuspend);
 271
 272     //Make sure ticks are still on multiples of cycles
 273     schedule(tickEvent, nextCycle(curTick() + ticks(delay)));
 274     _status = Running;
 275 }
 276
 277
 278 void
 279 AtomicSimpleCPU::suspendContext(int thread_num)
 280 {
 281     DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
 282
 283     assert(thread_num == 0);
 284     assert(thread);
 285
 286     if (_status == Idle)
 287         return;
 288
 289     assert(_status == Running);
 290
 291     // tick event may not be scheduled if this gets called from inside
 292     // an instruction's execution, e.g. "quiesce"
 293     if (tickEvent.scheduled())
 294         deschedule(tickEvent);
 295
 296     notIdleFraction--;
 297     _status = Idle;
 298 }
 299
 300
 301 Fault
 302 AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
 303                          unsigned size, unsigned flags)
 304 {
 305     // use the CPU's statically allocated read request and packet objects
 306     Request *req = &data_read_req;
 307
 308     if (traceData) {
 309         traceData->setAddr(addr);
 310     }
 311
 312     //The block size of our peer.
 313     unsigned blockSize = dcachePort.peerBlockSize();
 314     //The size of the data we're trying to read.
 315     int fullSize = size;
 316
 317     //The address of the second part of this access if it needs to be split
 318     //across a cache line boundary.
 319     Addr secondAddr = roundDown(addr + size - 1, blockSize);
 320
 321     if (secondAddr > addr)
 322         size = secondAddr - addr;
 323
 324     dcache_latency = 0;
 325
 326     while (1) {
 327         req->setVirt(0, addr, size, flags, thread->pcState().instAddr());
 328
 329         // translate to physical address
 330         Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
 331
 332         // Now do the access.
 333         if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
 334             Packet pkt = Packet(req,
 335                     req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq,
 336                     Packet::Broadcast);
 337             pkt.dataStatic(data);
 338
 339             if (req->isMmappedIpr())
 340                 dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt);
 341             else {
 342                 if (hasPhysMemPort && pkt.getAddr() == physMemAddr)
 343                     dcache_latency += physmemPort.sendAtomic(&pkt);
 344                 else
 345                     dcache_latency += dcachePort.sendAtomic(&pkt);
 346             }
 347             dcache_access = true;
 348
 349             assert(!pkt.isError());
 350
 351             if (req->isLLSC()) {
 352                 TheISA::handleLockedRead(thread, req);
 353             }
 354         }
 355
 356         //If there's a fault, return it
 357         if (fault != NoFault) {
 358             if (req->isPrefetch()) {
 359                 return NoFault;
 360             } else {
 361                 return fault;
 362             }
 363         }
 364
 365         //If we don't need to access a second cache line, stop now.
 366         if (secondAddr <= addr)
 367         {
 368             if (req->isLocked() && fault == NoFault) {
 369                 assert(!locked);
 370                 locked = true;
 371             }
 372             return fault;
 373         }
 374
 375         /*
 376          * Set up for accessing the second cache line.
 377          */
 378
 379         //Move the pointer we're reading into to the correct location.
 380         data += size;
 381         //Adjust the size to get the remaining bytes.
 382         size = addr + fullSize - secondAddr;
 383         //And access the right address.
 384         addr = secondAddr;
 385     }
 386 }
 387
 388
 389 Fault
 390 AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
 391                           Addr addr, unsigned flags, uint64_t *res)
 392 {
 393     // use the CPU's statically allocated write request and packet objects
 394     Request *req = &data_write_req;
 395
 396     if (traceData) {
 397         traceData->setAddr(addr);
 398     }
 399
 400     //The block size of our peer.
 401     unsigned blockSize = dcachePort.peerBlockSize();
 402     //The size of the data we're trying to read.
 403     int fullSize = size;
 404
 405     //The address of the second part of this access if it needs to be split
 406     //across a cache line boundary.
 407     Addr secondAddr = roundDown(addr + size - 1, blockSize);
 408
 409     if(secondAddr > addr)
 410         size = secondAddr - addr;
 411
 412     dcache_latency = 0;
 413
 414     while(1) {
 415         req->setVirt(0, addr, size, flags, thread->pcState().instAddr());
 416
 417         // translate to physical address
 418         Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
 419
 420         // Now do the access.
 421         if (fault == NoFault) {
 422             MemCmd cmd = MemCmd::WriteReq; // default
 423             bool do_access = true;  // flag to suppress cache access
 424
 425             if (req->isLLSC()) {
 426                 cmd = MemCmd::StoreCondReq;
 427                 do_access = TheISA::handleLockedWrite(thread, req);
 428             } else if (req->isSwap()) {
 429                 cmd = MemCmd::SwapReq;
 430                 if (req->isCondSwap()) {
 431                     assert(res);
 432                     req->setExtraData(*res);
 433                 }
 434             }
 435
 436             if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) {
 437                 Packet pkt = Packet(req, cmd, Packet::Broadcast);
 438                 pkt.dataStatic(data);
 439
 440                 if (req->isMmappedIpr()) {
 441                     dcache_latency +=
 442                         TheISA::handleIprWrite(thread->getTC(), &pkt);
 443                 } else {
 444                     if (hasPhysMemPort && pkt.getAddr() == physMemAddr)
 445                         dcache_latency += physmemPort.sendAtomic(&pkt);
 446                     else
 447                         dcache_latency += dcachePort.sendAtomic(&pkt);
 448                 }
 449                 dcache_access = true;
 450                 assert(!pkt.isError());
 451
 452                 if (req->isSwap()) {
 453                     assert(res);
 454                     memcpy(res, pkt.getPtr<uint8_t>(), fullSize);
 455                 }
 456             }
 457
 458             if (res && !req->isSwap()) {
 459                 *res = req->getExtraData();
 460             }
 461         }
 462
 463         //If there's a fault or we don't need to access a second cache line,
 464         //stop now.
 465         if (fault != NoFault || secondAddr <= addr)
 466         {
 467             if (req->isLocked() && fault == NoFault) {
 468                 assert(locked);
 469                 locked = false;
 470             }
 471             if (fault != NoFault && req->isPrefetch()) {
 472                 return NoFault;
 473             } else {
 474                 return fault;
 475             }
 476         }
 477
 478         /*
 479          * Set up for accessing the second cache line.
 480          */
 481
 482         //Move the pointer we're reading into to the correct location.
 483         data += size;
 484         //Adjust the size to get the remaining bytes.
 485         size = addr + fullSize - secondAddr;
 486         //And access the right address.
 487         addr = secondAddr;
 488     }
 489 }
 490
 491
 492 void
 493 AtomicSimpleCPU::tick()
 494 {
 495     DPRINTF(SimpleCPU, "Tick\n");
 496
 497     Tick latency = 0;
 498
 499     for (int i = 0; i < width || locked; ++i) {
 500         numCycles++;
 501
 502         if (!curStaticInst || !curStaticInst->isDelayedCommit())
 503             checkForInterrupts();
 504
 505         checkPcEventQueue();
 506         // We must have just got suspended by a PC event
 507         if (_status == Idle)
 508             return;
 509
 510         Fault fault = NoFault;
 511
 512         TheISA::PCState pcState = thread->pcState();
 513
 514         bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
 515                            !curMacroStaticInst;
 516         if (needToFetch) {
 517             setupFetchRequest(&ifetch_req);
 518             fault = thread->itb->translateAtomic(&ifetch_req, tc,
 519                                                  BaseTLB::Execute);
 520         }
 521
 522         if (fault == NoFault) {
 523             Tick icache_latency = 0;
 524             bool icache_access = false;
 525             dcache_access = false; // assume no dcache access
 526
 527             if (needToFetch) {
 528                 // This is commented out because the predecoder would act like
 529                 // a tiny cache otherwise. It wouldn't be flushed when needed
 530                 // like the I cache. It should be flushed, and when that works
 531                 // this code should be uncommented.
 532                 //Fetch more instruction memory if necessary
 533                 //if(predecoder.needMoreBytes())
 534                 //{
 535                     icache_access = true;
 536                     Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq,
 537                                                Packet::Broadcast);
 538                     ifetch_pkt.dataStatic(&inst);
 539
 540                     if (hasPhysMemPort && ifetch_pkt.getAddr() == physMemAddr)
 541                         icache_latency = physmemPort.sendAtomic(&ifetch_pkt);
 542                     else
 543                         icache_latency = icachePort.sendAtomic(&ifetch_pkt);
 544
 545                     assert(!ifetch_pkt.isError());
 546
 547                     // ifetch_req is initialized to read the instruction directly
 548                     // into the CPU object's inst field.
 549                 //}
 550             }
 551
 552             preExecute();
 553
 554             if (curStaticInst) {
 555                 fault = curStaticInst->execute(this, traceData);
 556
 557                 // keep an instruction count
 558                 if (fault == NoFault)
 559                     countInst();
 560                 else if (traceData && !DTRACE(ExecFaulting)) {
 561                     delete traceData;
 562                     traceData = NULL;
 563                 }
 564
 565                 postExecute();
 566             }
 567
 568             // @todo remove me after debugging with legion done
 569             if (curStaticInst && (!curStaticInst->isMicroop() ||
 570                         curStaticInst->isFirstMicroop()))
 571                 instCnt++;
 572
 573             Tick stall_ticks = 0;
 574             if (simulate_inst_stalls && icache_access)
 575                 stall_ticks += icache_latency;
 576
 577             if (simulate_data_stalls && dcache_access)
 578                 stall_ticks += dcache_latency;
 579
 580             if (stall_ticks) {
 581                 Tick stall_cycles = stall_ticks / ticks(1);
 582                 Tick aligned_stall_ticks = ticks(stall_cycles);
 583
 584                 if (aligned_stall_ticks < stall_ticks)
 585                     aligned_stall_ticks += 1;
 586
 587                 latency += aligned_stall_ticks;
 588             }
 589
 590         }
 591         if(fault != NoFault || !stayAtPC)
 592             advancePC(fault);
 593     }
 594
 595     // instruction takes at least one cycle
 596     if (latency < ticks(1))
 597         latency = ticks(1);
 598
 599     if (_status != Idle)
 600         schedule(tickEvent, curTick() + latency);
 601 }
 602
 603
 604 void
 605 AtomicSimpleCPU::printAddr(Addr a)
 606 {
 607     dcachePort.printAddr(a);
 608 }
 609
 610
 611 ////////////////////////////////////////////////////////////////////////
 612 //
 613 //  AtomicSimpleCPU Simulation Object
 614 //
 615 AtomicSimpleCPU *
 616 AtomicSimpleCPUParams::create()
 617 {
 618     numThreads = 1;
 619     if (!FullSystem && workload.size() != 1)
 620         panic("only one workload allowed");
 621     return new AtomicSimpleCPU(this);
 622 }