src/cpu/simple/atomic.cc

   1 /*
   2  * Copyright (c) 2012 ARM Limited
   3  * All rights reserved.
   4  *
   5  * The license below extends only to copyright in the software and shall
   6  * not be construed as granting a license to any other intellectual
   7  * property including but not limited to intellectual property relating
   8  * to a hardware implementation of the functionality of the software
   9  * licensed hereunder.  You may use the software subject to the license
  10  * terms below provided that you ensure that this notice is replicated
  11  * unmodified and in its entirety in all distributions of the software,
  12  * modified or unmodified, in source code or in binary form.
  13  *
  14  * Copyright (c) 2002-2005 The Regents of The University of Michigan
  15  * All rights reserved.
  16  *
  17  * Redistribution and use in source and binary forms, with or without
  18  * modification, are permitted provided that the following conditions are
  19  * met: redistributions of source code must retain the above copyright
  20  * notice, this list of conditions and the following disclaimer;
  21  * redistributions in binary form must reproduce the above copyright
  22  * notice, this list of conditions and the following disclaimer in the
  23  * documentation and/or other materials provided with the distribution;
  24  * neither the name of the copyright holders nor the names of its
  25  * contributors may be used to endorse or promote products derived from
  26  * this software without specific prior written permission.
  27  *
  28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  31  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  32  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  33  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  34  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  35  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  36  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  37  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  38  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39  *
  40  * Authors: Steve Reinhardt
  41  */
  42
  43 #include "arch/locked_mem.hh"
  44 #include "arch/mmapped_ipr.hh"
  45 #include "arch/utility.hh"
  46 #include "base/bigint.hh"
  47 #include "base/output.hh"
  48 #include "config/the_isa.hh"
  49 #include "cpu/simple/atomic.hh"
  50 #include "cpu/exetrace.hh"
  51 #include "debug/Drain.hh"
  52 #include "debug/ExecFaulting.hh"
  53 #include "debug/SimpleCPU.hh"
  54 #include "mem/packet.hh"
  55 #include "mem/packet_access.hh"
  56 #include "mem/physical.hh"
  57 #include "params/AtomicSimpleCPU.hh"
  58 #include "sim/faults.hh"
  59 #include "sim/system.hh"
  60 #include "sim/full_system.hh"
  61
  62 using namespace std;
  63 using namespace TheISA;
  64
  65 AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c)
  66     : Event(CPU_Tick_Pri), cpu(c)
  67 {
  68 }
  69
  70
  71 void
  72 AtomicSimpleCPU::TickEvent::process()
  73 {
  74     cpu->tick();
  75 }
  76
  77 const char *
  78 AtomicSimpleCPU::TickEvent::description() const
  79 {
  80     return "AtomicSimpleCPU tick";
  81 }
  82
  83 void
  84 AtomicSimpleCPU::init()
  85 {
  86     BaseCPU::init();
  87
  88     // Initialise the ThreadContext's memory proxies
  89     tcBase()->initMemProxies(tcBase());
  90
  91     if (FullSystem && !params()->switched_out) {
  92         ThreadID size = threadContexts.size();
  93         for (ThreadID i = 0; i < size; ++i) {
  94             ThreadContext *tc = threadContexts[i];
  95             // initialize CPU, including PC
  96             TheISA::initCPU(tc, tc->contextId());
  97         }
  98     }
  99
 100     // Atomic doesn't do MT right now, so contextId == threadId
 101     ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
 102     data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
 103     data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
 104 }
 105
 106 AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
 107     : BaseSimpleCPU(p), tickEvent(this), width(p->width), locked(false),
 108       simulate_data_stalls(p->simulate_data_stalls),
 109       simulate_inst_stalls(p->simulate_inst_stalls),
 110       drain_manager(NULL),
 111       icachePort(name() + ".icache_port", this),
 112       dcachePort(name() + ".dcache_port", this),
 113       fastmem(p->fastmem),
 114       simpoint(p->simpoint_profile),
 115       intervalSize(p->simpoint_interval),
 116       intervalCount(0),
 117       intervalDrift(0),
 118       simpointStream(NULL),
 119       currentBBV(0, 0),
 120       currentBBVInstCount(0)
 121 {
 122     _status = Idle;
 123
 124     if (simpoint) {
 125         simpointStream = simout.create(p->simpoint_profile_file, false);
 126     }
 127 }
 128
 129
 130 AtomicSimpleCPU::~AtomicSimpleCPU()
 131 {
 132     if (tickEvent.scheduled()) {
 133         deschedule(tickEvent);
 134     }
 135     if (simpointStream) {
 136         simout.close(simpointStream);
 137     }
 138 }
 139
 140 unsigned int
 141 AtomicSimpleCPU::drain(DrainManager *dm)
 142 {
 143     assert(!drain_manager);
 144     if (switchedOut())
 145         return 0;
 146
 147     if (!isDrained()) {
 148         DPRINTF(Drain, "Requesting drain: %s\n", pcState());
 149         drain_manager = dm;
 150         return 1;
 151     } else {
 152         if (tickEvent.scheduled())
 153             deschedule(tickEvent);
 154
 155         DPRINTF(Drain, "Not executing microcode, no need to drain.\n");
 156         return 0;
 157     }
 158 }
 159
 160 void
 161 AtomicSimpleCPU::drainResume()
 162 {
 163     assert(!tickEvent.scheduled());
 164     assert(!drain_manager);
 165     if (switchedOut())
 166         return;
 167
 168     DPRINTF(SimpleCPU, "Resume\n");
 169     verifyMemoryMode();
 170
 171     assert(!threadContexts.empty());
 172     if (threadContexts.size() > 1)
 173         fatal("The atomic CPU only supports one thread.\n");
 174
 175     if (thread->status() == ThreadContext::Active) {
 176         schedule(tickEvent, nextCycle());
 177         _status = BaseSimpleCPU::Running;
 178         notIdleFraction = 1;
 179     } else {
 180         _status = BaseSimpleCPU::Idle;
 181         notIdleFraction = 0;
 182     }
 183
 184     system->totalNumInsts = 0;
 185 }
 186
 187 bool
 188 AtomicSimpleCPU::tryCompleteDrain()
 189 {
 190     if (!drain_manager)
 191         return false;
 192
 193     DPRINTF(Drain, "tryCompleteDrain: %s\n", pcState());
 194     if (!isDrained())
 195         return false;
 196
 197     DPRINTF(Drain, "CPU done draining, processing drain event\n");
 198     drain_manager->signalDrainDone();
 199     drain_manager = NULL;
 200
 201     return true;
 202 }
 203
 204
 205 void
 206 AtomicSimpleCPU::switchOut()
 207 {
 208     BaseSimpleCPU::switchOut();
 209
 210     assert(!tickEvent.scheduled());
 211     assert(_status == BaseSimpleCPU::Running || _status == Idle);
 212     assert(isDrained());
 213 }
 214
 215
 216 void
 217 AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
 218 {
 219     BaseSimpleCPU::takeOverFrom(oldCPU);
 220
 221     // The tick event should have been descheduled by drain()
 222     assert(!tickEvent.scheduled());
 223
 224     ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
 225     data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
 226     data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
 227 }
 228
 229 void
 230 AtomicSimpleCPU::verifyMemoryMode() const
 231 {
 232     if (!system->isAtomicMode()) {
 233         fatal("The atomic CPU requires the memory system to be in "
 234               "'atomic' mode.\n");
 235     }
 236 }
 237
 238 void
 239 AtomicSimpleCPU::activateContext(ThreadID thread_num, Cycles delay)
 240 {
 241     DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay);
 242
 243     assert(thread_num == 0);
 244     assert(thread);
 245
 246     assert(_status == Idle);
 247     assert(!tickEvent.scheduled());
 248
 249     notIdleFraction = 1;
 250     numCycles += ticksToCycles(thread->lastActivate - thread->lastSuspend);
 251
 252     //Make sure ticks are still on multiples of cycles
 253     schedule(tickEvent, clockEdge(delay));
 254     _status = BaseSimpleCPU::Running;
 255 }
 256
 257
 258 void
 259 AtomicSimpleCPU::suspendContext(ThreadID thread_num)
 260 {
 261     DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
 262
 263     assert(thread_num == 0);
 264     assert(thread);
 265
 266     if (_status == Idle)
 267         return;
 268
 269     assert(_status == BaseSimpleCPU::Running);
 270
 271     // tick event may not be scheduled if this gets called from inside
 272     // an instruction's execution, e.g. "quiesce"
 273     if (tickEvent.scheduled())
 274         deschedule(tickEvent);
 275
 276     notIdleFraction = 0;
 277     _status = Idle;
 278 }
 279
 280
 281 Fault
 282 AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
 283                          unsigned size, unsigned flags)
 284 {
 285     // use the CPU's statically allocated read request and packet objects
 286     Request *req = &data_read_req;
 287
 288     if (traceData) {
 289         traceData->setAddr(addr);
 290     }
 291
 292     //The size of the data we're trying to read.
 293     int fullSize = size;
 294
 295     //The address of the second part of this access if it needs to be split
 296     //across a cache line boundary.
 297     Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
 298
 299     if (secondAddr > addr)
 300         size = secondAddr - addr;
 301
 302     dcache_latency = 0;
 303
 304     while (1) {
 305         req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
 306
 307         // translate to physical address
 308         Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
 309
 310         // Now do the access.
 311         if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
 312             Packet pkt = Packet(req,
 313                                 req->isLLSC() ? MemCmd::LoadLockedReq :
 314                                 MemCmd::ReadReq);
 315             pkt.dataStatic(data);
 316
 317             if (req->isMmappedIpr())
 318                 dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt);
 319             else {
 320                 if (fastmem && system->isMemAddr(pkt.getAddr()))
 321                     system->getPhysMem().access(&pkt);
 322                 else
 323                     dcache_latency += dcachePort.sendAtomic(&pkt);
 324             }
 325             dcache_access = true;
 326
 327             assert(!pkt.isError());
 328
 329             if (req->isLLSC()) {
 330                 TheISA::handleLockedRead(thread, req);
 331             }
 332         }
 333
 334         //If there's a fault, return it
 335         if (fault != NoFault) {
 336             if (req->isPrefetch()) {
 337                 return NoFault;
 338             } else {
 339                 return fault;
 340             }
 341         }
 342
 343         //If we don't need to access a second cache line, stop now.
 344         if (secondAddr <= addr)
 345         {
 346             if (req->isLocked() && fault == NoFault) {
 347                 assert(!locked);
 348                 locked = true;
 349             }
 350             return fault;
 351         }
 352
 353         /*
 354          * Set up for accessing the second cache line.
 355          */
 356
 357         //Move the pointer we're reading into to the correct location.
 358         data += size;
 359         //Adjust the size to get the remaining bytes.
 360         size = addr + fullSize - secondAddr;
 361         //And access the right address.
 362         addr = secondAddr;
 363     }
 364 }
 365
 366
 367 Fault
 368 AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
 369                           Addr addr, unsigned flags, uint64_t *res)
 370 {
 371     // use the CPU's statically allocated write request and packet objects
 372     Request *req = &data_write_req;
 373
 374     if (traceData) {
 375         traceData->setAddr(addr);
 376     }
 377
 378     //The size of the data we're trying to read.
 379     int fullSize = size;
 380
 381     //The address of the second part of this access if it needs to be split
 382     //across a cache line boundary.
 383     Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
 384
 385     if(secondAddr > addr)
 386         size = secondAddr - addr;
 387
 388     dcache_latency = 0;
 389
 390     while(1) {
 391         req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
 392
 393         // translate to physical address
 394         Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
 395
 396         // Now do the access.
 397         if (fault == NoFault) {
 398             MemCmd cmd = MemCmd::WriteReq; // default
 399             bool do_access = true;  // flag to suppress cache access
 400
 401             if (req->isLLSC()) {
 402                 cmd = MemCmd::StoreCondReq;
 403                 do_access = TheISA::handleLockedWrite(thread, req);
 404             } else if (req->isSwap()) {
 405                 cmd = MemCmd::SwapReq;
 406                 if (req->isCondSwap()) {
 407                     assert(res);
 408                     req->setExtraData(*res);
 409                 }
 410             }
 411
 412             if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) {
 413                 Packet pkt = Packet(req, cmd);
 414                 pkt.dataStatic(data);
 415
 416                 if (req->isMmappedIpr()) {
 417                     dcache_latency +=
 418                         TheISA::handleIprWrite(thread->getTC(), &pkt);
 419                 } else {
 420                     if (fastmem && system->isMemAddr(pkt.getAddr()))
 421                         system->getPhysMem().access(&pkt);
 422                     else
 423                         dcache_latency += dcachePort.sendAtomic(&pkt);
 424                 }
 425                 dcache_access = true;
 426                 assert(!pkt.isError());
 427
 428                 if (req->isSwap()) {
 429                     assert(res);
 430                     memcpy(res, pkt.getPtr<uint8_t>(), fullSize);
 431                 }
 432             }
 433
 434             if (res && !req->isSwap()) {
 435                 *res = req->getExtraData();
 436             }
 437         }
 438
 439         //If there's a fault or we don't need to access a second cache line,
 440         //stop now.
 441         if (fault != NoFault || secondAddr <= addr)
 442         {
 443             if (req->isLocked() && fault == NoFault) {
 444                 assert(locked);
 445                 locked = false;
 446             }
 447             if (fault != NoFault && req->isPrefetch()) {
 448                 return NoFault;
 449             } else {
 450                 return fault;
 451             }
 452         }
 453
 454         /*
 455          * Set up for accessing the second cache line.
 456          */
 457
 458         //Move the pointer we're reading into to the correct location.
 459         data += size;
 460         //Adjust the size to get the remaining bytes.
 461         size = addr + fullSize - secondAddr;
 462         //And access the right address.
 463         addr = secondAddr;
 464     }
 465 }
 466
 467
 468 void
 469 AtomicSimpleCPU::tick()
 470 {
 471     DPRINTF(SimpleCPU, "Tick\n");
 472
 473     Tick latency = 0;
 474
 475     for (int i = 0; i < width || locked; ++i) {
 476         numCycles++;
 477
 478         if (!curStaticInst || !curStaticInst->isDelayedCommit())
 479             checkForInterrupts();
 480
 481         checkPcEventQueue();
 482         // We must have just got suspended by a PC event
 483         if (_status == Idle) {
 484             tryCompleteDrain();
 485             return;
 486         }
 487
 488         Fault fault = NoFault;
 489
 490         TheISA::PCState pcState = thread->pcState();
 491
 492         bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
 493                            !curMacroStaticInst;
 494         if (needToFetch) {
 495             setupFetchRequest(&ifetch_req);
 496             fault = thread->itb->translateAtomic(&ifetch_req, tc,
 497                                                  BaseTLB::Execute);
 498         }
 499
 500         if (fault == NoFault) {
 501             Tick icache_latency = 0;
 502             bool icache_access = false;
 503             dcache_access = false; // assume no dcache access
 504
 505             if (needToFetch) {
 506                 // This is commented out because the decoder would act like
 507                 // a tiny cache otherwise. It wouldn't be flushed when needed
 508                 // like the I cache. It should be flushed, and when that works
 509                 // this code should be uncommented.
 510                 //Fetch more instruction memory if necessary
 511                 //if(decoder.needMoreBytes())
 512                 //{
 513                     icache_access = true;
 514                     Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq);
 515                     ifetch_pkt.dataStatic(&inst);
 516
 517                     if (fastmem && system->isMemAddr(ifetch_pkt.getAddr()))
 518                         system->getPhysMem().access(&ifetch_pkt);
 519                     else
 520                         icache_latency = icachePort.sendAtomic(&ifetch_pkt);
 521
 522                     assert(!ifetch_pkt.isError());
 523
 524                     // ifetch_req is initialized to read the instruction directly
 525                     // into the CPU object's inst field.
 526                 //}
 527             }
 528
 529             preExecute();
 530
 531             if (curStaticInst) {
 532                 fault = curStaticInst->execute(this, traceData);
 533
 534                 // keep an instruction count
 535                 if (fault == NoFault)
 536                     countInst();
 537                 else if (traceData && !DTRACE(ExecFaulting)) {
 538                     delete traceData;
 539                     traceData = NULL;
 540                 }
 541
 542                 postExecute();
 543             }
 544
 545             // @todo remove me after debugging with legion done
 546             if (curStaticInst && (!curStaticInst->isMicroop() ||
 547                         curStaticInst->isFirstMicroop()))
 548                 instCnt++;
 549
 550             // profile for SimPoints if enabled and macro inst is finished
 551             if (simpoint && curStaticInst && (fault == NoFault) &&
 552                     (!curStaticInst->isMicroop() ||
 553                      curStaticInst->isLastMicroop())) {
 554                 profileSimPoint();
 555             }
 556
 557             Tick stall_ticks = 0;
 558             if (simulate_inst_stalls && icache_access)
 559                 stall_ticks += icache_latency;
 560
 561             if (simulate_data_stalls && dcache_access)
 562                 stall_ticks += dcache_latency;
 563
 564             if (stall_ticks) {
 565                 // the atomic cpu does its accounting in ticks, so
 566                 // keep counting in ticks but round to the clock
 567                 // period
 568                 latency += divCeil(stall_ticks, clockPeriod()) *
 569                     clockPeriod();
 570             }
 571
 572         }
 573         if(fault != NoFault || !stayAtPC)
 574             advancePC(fault);
 575     }
 576
 577     if (tryCompleteDrain())
 578         return;
 579
 580     // instruction takes at least one cycle
 581     if (latency < clockPeriod())
 582         latency = clockPeriod();
 583
 584     if (_status != Idle)
 585         schedule(tickEvent, curTick() + latency);
 586 }
 587
 588
 589 void
 590 AtomicSimpleCPU::printAddr(Addr a)
 591 {
 592     dcachePort.printAddr(a);
 593 }
 594
 595 void
 596 AtomicSimpleCPU::profileSimPoint()
 597 {
 598     if (!currentBBVInstCount)
 599         currentBBV.first = thread->pcState().instAddr();
 600
 601     ++intervalCount;
 602     ++currentBBVInstCount;
 603
 604     // If inst is control inst, assume end of basic block.
 605     if (curStaticInst->isControl()) {
 606         currentBBV.second = thread->pcState().instAddr();
 607
 608         auto map_itr = bbMap.find(currentBBV);
 609         if (map_itr == bbMap.end()){
 610             // If a new (previously unseen) basic block is found,
 611             // add a new unique id, record num of insts and insert into bbMap.
 612             BBInfo info;
 613             info.id = bbMap.size() + 1;
 614             info.insts = currentBBVInstCount;
 615             info.count = currentBBVInstCount;
 616             bbMap.insert(std::make_pair(currentBBV, info));
 617         } else {
 618             // If basic block is seen before, just increment the count by the
 619             // number of insts in basic block.
 620             BBInfo& info = map_itr->second;
 621             assert(info.insts == currentBBVInstCount);
 622             info.count += currentBBVInstCount;
 623         }
 624         currentBBVInstCount = 0;
 625
 626         // Reached end of interval if the sum of the current inst count
 627         // (intervalCount) and the excessive inst count from the previous
 628         // interval (intervalDrift) is greater than/equal to the interval size.
 629         if (intervalCount + intervalDrift >= intervalSize) {
 630             // summarize interval and display BBV info
 631             std::vector<pair<uint64_t, uint64_t> > counts;
 632             for (auto map_itr = bbMap.begin(); map_itr != bbMap.end();
 633                     ++map_itr) {
 634                 BBInfo& info = map_itr->second;
 635                 if (info.count != 0) {
 636                     counts.push_back(std::make_pair(info.id, info.count));
 637                     info.count = 0;
 638                 }
 639             }
 640             std::sort(counts.begin(), counts.end());
 641
 642             // Print output BBV info
 643             *simpointStream << "T";
 644             for (auto cnt_itr = counts.begin(); cnt_itr != counts.end();
 645                     ++cnt_itr) {
 646                 *simpointStream << ":" << cnt_itr->first
 647                                 << ":" << cnt_itr->second << " ";
 648             }
 649             *simpointStream << "\n";
 650
 651             intervalDrift = (intervalCount + intervalDrift) - intervalSize;
 652             intervalCount = 0;
 653         }
 654     }
 655 }
 656
 657 ////////////////////////////////////////////////////////////////////////
 658 //
 659 //  AtomicSimpleCPU Simulation Object
 660 //
 661 AtomicSimpleCPU *
 662 AtomicSimpleCPUParams::create()
 663 {
 664     numThreads = 1;
 665     if (!FullSystem && workload.size() != 1)
 666         panic("only one workload allowed");
 667     return new AtomicSimpleCPU(this);
 668 }