src/cpu/simple/atomic.cc

   1 /*
   2  * Copyright (c) 2012 ARM Limited
   3  * All rights reserved.
   4  *
   5  * The license below extends only to copyright in the software and shall
   6  * not be construed as granting a license to any other intellectual
   7  * property including but not limited to intellectual property relating
   8  * to a hardware implementation of the functionality of the software
   9  * licensed hereunder.  You may use the software subject to the license
  10  * terms below provided that you ensure that this notice is replicated
  11  * unmodified and in its entirety in all distributions of the software,
  12  * modified or unmodified, in source code or in binary form.
  13  *
  14  * Copyright (c) 2002-2005 The Regents of The University of Michigan
  15  * All rights reserved.
  16  *
  17  * Redistribution and use in source and binary forms, with or without
  18  * modification, are permitted provided that the following conditions are
  19  * met: redistributions of source code must retain the above copyright
  20  * notice, this list of conditions and the following disclaimer;
  21  * redistributions in binary form must reproduce the above copyright
  22  * notice, this list of conditions and the following disclaimer in the
  23  * documentation and/or other materials provided with the distribution;
  24  * neither the name of the copyright holders nor the names of its
  25  * contributors may be used to endorse or promote products derived from
  26  * this software without specific prior written permission.
  27  *
  28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  31  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  32  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  33  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  34  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  35  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  36  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  37  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  38  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39  *
  40  * Authors: Steve Reinhardt
  41  */
  42
  43 #include "arch/locked_mem.hh"
  44 #include "arch/mmapped_ipr.hh"
  45 #include "arch/utility.hh"
  46 #include "base/bigint.hh"
  47 #include "config/the_isa.hh"
  48 #include "cpu/simple/atomic.hh"
  49 #include "cpu/exetrace.hh"
  50 #include "debug/Drain.hh"
  51 #include "debug/ExecFaulting.hh"
  52 #include "debug/SimpleCPU.hh"
  53 #include "mem/packet.hh"
  54 #include "mem/packet_access.hh"
  55 #include "mem/physical.hh"
  56 #include "params/AtomicSimpleCPU.hh"
  57 #include "sim/faults.hh"
  58 #include "sim/system.hh"
  59 #include "sim/full_system.hh"
  60
  61 using namespace std;
  62 using namespace TheISA;
  63
  64 AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c)
  65     : Event(CPU_Tick_Pri), cpu(c)
  66 {
  67 }
  68
  69
  70 void
  71 AtomicSimpleCPU::TickEvent::process()
  72 {
  73     cpu->tick();
  74 }
  75
  76 const char *
  77 AtomicSimpleCPU::TickEvent::description() const
  78 {
  79     return "AtomicSimpleCPU tick";
  80 }
  81
  82 void
  83 AtomicSimpleCPU::init()
  84 {
  85     BaseCPU::init();
  86
  87     // Initialise the ThreadContext's memory proxies
  88     tcBase()->initMemProxies(tcBase());
  89
  90     if (FullSystem && !params()->switched_out) {
  91         ThreadID size = threadContexts.size();
  92         for (ThreadID i = 0; i < size; ++i) {
  93             ThreadContext *tc = threadContexts[i];
  94             // initialize CPU, including PC
  95             TheISA::initCPU(tc, tc->contextId());
  96         }
  97     }
  98
  99     // Atomic doesn't do MT right now, so contextId == threadId
 100     ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
 101     data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
 102     data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
 103 }
 104
 105 AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
 106     : BaseSimpleCPU(p), tickEvent(this), width(p->width), locked(false),
 107       simulate_data_stalls(p->simulate_data_stalls),
 108       simulate_inst_stalls(p->simulate_inst_stalls),
 109       drain_manager(NULL),
 110       icachePort(name() + ".icache_port", this),
 111       dcachePort(name() + ".dcache_port", this),
 112       fastmem(p->fastmem)
 113 {
 114     _status = Idle;
 115 }
 116
 117
 118 AtomicSimpleCPU::~AtomicSimpleCPU()
 119 {
 120     if (tickEvent.scheduled()) {
 121         deschedule(tickEvent);
 122     }
 123 }
 124
 125 unsigned int
 126 AtomicSimpleCPU::drain(DrainManager *dm)
 127 {
 128     assert(!drain_manager);
 129     if (switchedOut())
 130         return 0;
 131
 132     if (!isDrained()) {
 133         DPRINTF(Drain, "Requesting drain: %s\n", pcState());
 134         drain_manager = dm;
 135         return 1;
 136     } else {
 137         if (tickEvent.scheduled())
 138             deschedule(tickEvent);
 139
 140         DPRINTF(Drain, "Not executing microcode, no need to drain.\n");
 141         return 0;
 142     }
 143 }
 144
 145 void
 146 AtomicSimpleCPU::drainResume()
 147 {
 148     assert(!tickEvent.scheduled());
 149     assert(!drain_manager);
 150     if (switchedOut())
 151         return;
 152
 153     DPRINTF(SimpleCPU, "Resume\n");
 154     verifyMemoryMode();
 155
 156     assert(!threadContexts.empty());
 157     if (threadContexts.size() > 1)
 158         fatal("The atomic CPU only supports one thread.\n");
 159
 160     if (thread->status() == ThreadContext::Active) {
 161         schedule(tickEvent, nextCycle());
 162         _status = BaseSimpleCPU::Running;
 163     } else {
 164         _status = BaseSimpleCPU::Idle;
 165     }
 166
 167     system->totalNumInsts = 0;
 168 }
 169
 170 bool
 171 AtomicSimpleCPU::tryCompleteDrain()
 172 {
 173     if (!drain_manager)
 174         return false;
 175
 176     DPRINTF(Drain, "tryCompleteDrain: %s\n", pcState());
 177     if (!isDrained())
 178         return false;
 179
 180     DPRINTF(Drain, "CPU done draining, processing drain event\n");
 181     drain_manager->signalDrainDone();
 182     drain_manager = NULL;
 183
 184     return true;
 185 }
 186
 187
 188 void
 189 AtomicSimpleCPU::switchOut()
 190 {
 191     BaseSimpleCPU::switchOut();
 192
 193     assert(!tickEvent.scheduled());
 194     assert(_status == BaseSimpleCPU::Running || _status == Idle);
 195     assert(isDrained());
 196 }
 197
 198
 199 void
 200 AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
 201 {
 202     BaseSimpleCPU::takeOverFrom(oldCPU);
 203
 204     // The tick event should have been descheduled by drain()
 205     assert(!tickEvent.scheduled());
 206
 207     ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
 208     data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
 209     data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
 210 }
 211
 212 void
 213 AtomicSimpleCPU::verifyMemoryMode() const
 214 {
 215     if (!system->isAtomicMode()) {
 216         fatal("The atomic CPU requires the memory system to be in "
 217               "'atomic' mode.\n");
 218     }
 219 }
 220
 221 void
 222 AtomicSimpleCPU::activateContext(ThreadID thread_num, Cycles delay)
 223 {
 224     DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay);
 225
 226     assert(thread_num == 0);
 227     assert(thread);
 228
 229     assert(_status == Idle);
 230     assert(!tickEvent.scheduled());
 231
 232     notIdleFraction++;
 233     numCycles += ticksToCycles(thread->lastActivate - thread->lastSuspend);
 234
 235     //Make sure ticks are still on multiples of cycles
 236     schedule(tickEvent, clockEdge(delay));
 237     _status = BaseSimpleCPU::Running;
 238 }
 239
 240
 241 void
 242 AtomicSimpleCPU::suspendContext(ThreadID thread_num)
 243 {
 244     DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
 245
 246     assert(thread_num == 0);
 247     assert(thread);
 248
 249     if (_status == Idle)
 250         return;
 251
 252     assert(_status == BaseSimpleCPU::Running);
 253
 254     // tick event may not be scheduled if this gets called from inside
 255     // an instruction's execution, e.g. "quiesce"
 256     if (tickEvent.scheduled())
 257         deschedule(tickEvent);
 258
 259     notIdleFraction--;
 260     _status = Idle;
 261 }
 262
 263
 264 Fault
 265 AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
 266                          unsigned size, unsigned flags)
 267 {
 268     // use the CPU's statically allocated read request and packet objects
 269     Request *req = &data_read_req;
 270
 271     if (traceData) {
 272         traceData->setAddr(addr);
 273     }
 274
 275     //The block size of our peer.
 276     unsigned blockSize = dcachePort.peerBlockSize();
 277     //The size of the data we're trying to read.
 278     int fullSize = size;
 279
 280     //The address of the second part of this access if it needs to be split
 281     //across a cache line boundary.
 282     Addr secondAddr = roundDown(addr + size - 1, blockSize);
 283
 284     if (secondAddr > addr)
 285         size = secondAddr - addr;
 286
 287     dcache_latency = 0;
 288
 289     while (1) {
 290         req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
 291
 292         // translate to physical address
 293         Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
 294
 295         // Now do the access.
 296         if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
 297             Packet pkt = Packet(req,
 298                                 req->isLLSC() ? MemCmd::LoadLockedReq :
 299                                 MemCmd::ReadReq);
 300             pkt.dataStatic(data);
 301
 302             if (req->isMmappedIpr())
 303                 dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt);
 304             else {
 305                 if (fastmem && system->isMemAddr(pkt.getAddr()))
 306                     system->getPhysMem().access(&pkt);
 307                 else
 308                     dcache_latency += dcachePort.sendAtomic(&pkt);
 309             }
 310             dcache_access = true;
 311
 312             assert(!pkt.isError());
 313
 314             if (req->isLLSC()) {
 315                 TheISA::handleLockedRead(thread, req);
 316             }
 317         }
 318
 319         //If there's a fault, return it
 320         if (fault != NoFault) {
 321             if (req->isPrefetch()) {
 322                 return NoFault;
 323             } else {
 324                 return fault;
 325             }
 326         }
 327
 328         //If we don't need to access a second cache line, stop now.
 329         if (secondAddr <= addr)
 330         {
 331             if (req->isLocked() && fault == NoFault) {
 332                 assert(!locked);
 333                 locked = true;
 334             }
 335             return fault;
 336         }
 337
 338         /*
 339          * Set up for accessing the second cache line.
 340          */
 341
 342         //Move the pointer we're reading into to the correct location.
 343         data += size;
 344         //Adjust the size to get the remaining bytes.
 345         size = addr + fullSize - secondAddr;
 346         //And access the right address.
 347         addr = secondAddr;
 348     }
 349 }
 350
 351
 352 Fault
 353 AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
 354                           Addr addr, unsigned flags, uint64_t *res)
 355 {
 356     // use the CPU's statically allocated write request and packet objects
 357     Request *req = &data_write_req;
 358
 359     if (traceData) {
 360         traceData->setAddr(addr);
 361     }
 362
 363     //The block size of our peer.
 364     unsigned blockSize = dcachePort.peerBlockSize();
 365     //The size of the data we're trying to read.
 366     int fullSize = size;
 367
 368     //The address of the second part of this access if it needs to be split
 369     //across a cache line boundary.
 370     Addr secondAddr = roundDown(addr + size - 1, blockSize);
 371
 372     if(secondAddr > addr)
 373         size = secondAddr - addr;
 374
 375     dcache_latency = 0;
 376
 377     while(1) {
 378         req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
 379
 380         // translate to physical address
 381         Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
 382
 383         // Now do the access.
 384         if (fault == NoFault) {
 385             MemCmd cmd = MemCmd::WriteReq; // default
 386             bool do_access = true;  // flag to suppress cache access
 387
 388             if (req->isLLSC()) {
 389                 cmd = MemCmd::StoreCondReq;
 390                 do_access = TheISA::handleLockedWrite(thread, req);
 391             } else if (req->isSwap()) {
 392                 cmd = MemCmd::SwapReq;
 393                 if (req->isCondSwap()) {
 394                     assert(res);
 395                     req->setExtraData(*res);
 396                 }
 397             }
 398
 399             if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) {
 400                 Packet pkt = Packet(req, cmd);
 401                 pkt.dataStatic(data);
 402
 403                 if (req->isMmappedIpr()) {
 404                     dcache_latency +=
 405                         TheISA::handleIprWrite(thread->getTC(), &pkt);
 406                 } else {
 407                     if (fastmem && system->isMemAddr(pkt.getAddr()))
 408                         system->getPhysMem().access(&pkt);
 409                     else
 410                         dcache_latency += dcachePort.sendAtomic(&pkt);
 411                 }
 412                 dcache_access = true;
 413                 assert(!pkt.isError());
 414
 415                 if (req->isSwap()) {
 416                     assert(res);
 417                     memcpy(res, pkt.getPtr<uint8_t>(), fullSize);
 418                 }
 419             }
 420
 421             if (res && !req->isSwap()) {
 422                 *res = req->getExtraData();
 423             }
 424         }
 425
 426         //If there's a fault or we don't need to access a second cache line,
 427         //stop now.
 428         if (fault != NoFault || secondAddr <= addr)
 429         {
 430             if (req->isLocked() && fault == NoFault) {
 431                 assert(locked);
 432                 locked = false;
 433             }
 434             if (fault != NoFault && req->isPrefetch()) {
 435                 return NoFault;
 436             } else {
 437                 return fault;
 438             }
 439         }
 440
 441         /*
 442          * Set up for accessing the second cache line.
 443          */
 444
 445         //Move the pointer we're reading into to the correct location.
 446         data += size;
 447         //Adjust the size to get the remaining bytes.
 448         size = addr + fullSize - secondAddr;
 449         //And access the right address.
 450         addr = secondAddr;
 451     }
 452 }
 453
 454
 455 void
 456 AtomicSimpleCPU::tick()
 457 {
 458     DPRINTF(SimpleCPU, "Tick\n");
 459
 460     Tick latency = 0;
 461
 462     for (int i = 0; i < width || locked; ++i) {
 463         numCycles++;
 464
 465         if (!curStaticInst || !curStaticInst->isDelayedCommit())
 466             checkForInterrupts();
 467
 468         checkPcEventQueue();
 469         // We must have just got suspended by a PC event
 470         if (_status == Idle) {
 471             tryCompleteDrain();
 472             return;
 473         }
 474
 475         Fault fault = NoFault;
 476
 477         TheISA::PCState pcState = thread->pcState();
 478
 479         bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
 480                            !curMacroStaticInst;
 481         if (needToFetch) {
 482             setupFetchRequest(&ifetch_req);
 483             fault = thread->itb->translateAtomic(&ifetch_req, tc,
 484                                                  BaseTLB::Execute);
 485         }
 486
 487         if (fault == NoFault) {
 488             Tick icache_latency = 0;
 489             bool icache_access = false;
 490             dcache_access = false; // assume no dcache access
 491
 492             if (needToFetch) {
 493                 // This is commented out because the decoder would act like
 494                 // a tiny cache otherwise. It wouldn't be flushed when needed
 495                 // like the I cache. It should be flushed, and when that works
 496                 // this code should be uncommented.
 497                 //Fetch more instruction memory if necessary
 498                 //if(decoder.needMoreBytes())
 499                 //{
 500                     icache_access = true;
 501                     Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq);
 502                     ifetch_pkt.dataStatic(&inst);
 503
 504                     if (fastmem && system->isMemAddr(ifetch_pkt.getAddr()))
 505                         system->getPhysMem().access(&ifetch_pkt);
 506                     else
 507                         icache_latency = icachePort.sendAtomic(&ifetch_pkt);
 508
 509                     assert(!ifetch_pkt.isError());
 510
 511                     // ifetch_req is initialized to read the instruction directly
 512                     // into the CPU object's inst field.
 513                 //}
 514             }
 515
 516             preExecute();
 517
 518             if (curStaticInst) {
 519                 fault = curStaticInst->execute(this, traceData);
 520
 521                 // keep an instruction count
 522                 if (fault == NoFault)
 523                     countInst();
 524                 else if (traceData && !DTRACE(ExecFaulting)) {
 525                     delete traceData;
 526                     traceData = NULL;
 527                 }
 528
 529                 postExecute();
 530             }
 531
 532             // @todo remove me after debugging with legion done
 533             if (curStaticInst && (!curStaticInst->isMicroop() ||
 534                         curStaticInst->isFirstMicroop()))
 535                 instCnt++;
 536
 537             Tick stall_ticks = 0;
 538             if (simulate_inst_stalls && icache_access)
 539                 stall_ticks += icache_latency;
 540
 541             if (simulate_data_stalls && dcache_access)
 542                 stall_ticks += dcache_latency;
 543
 544             if (stall_ticks) {
 545                 // the atomic cpu does its accounting in ticks, so
 546                 // keep counting in ticks but round to the clock
 547                 // period
 548                 latency += divCeil(stall_ticks, clockPeriod()) *
 549                     clockPeriod();
 550             }
 551
 552         }
 553         if(fault != NoFault || !stayAtPC)
 554             advancePC(fault);
 555     }
 556
 557     if (tryCompleteDrain())
 558         return;
 559
 560     // instruction takes at least one cycle
 561     if (latency < clockPeriod())
 562         latency = clockPeriod();
 563
 564     if (_status != Idle)
 565         schedule(tickEvent, curTick() + latency);
 566 }
 567
 568
 569 void
 570 AtomicSimpleCPU::printAddr(Addr a)
 571 {
 572     dcachePort.printAddr(a);
 573 }
 574
 575
 576 ////////////////////////////////////////////////////////////////////////
 577 //
 578 //  AtomicSimpleCPU Simulation Object
 579 //
 580 AtomicSimpleCPU *
 581 AtomicSimpleCPUParams::create()
 582 {
 583     numThreads = 1;
 584     if (!FullSystem && workload.size() != 1)
 585         panic("only one workload allowed");
 586     return new AtomicSimpleCPU(this);
 587 }