src/cpu/simple/atomic.cc

   1 /*
   2  * Copyright (c) 2002-2005 The Regents of The University of Michigan
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions are
   7  * met: redistributions of source code must retain the above copyright
   8  * notice, this list of conditions and the following disclaimer;
   9  * redistributions in binary form must reproduce the above copyright
  10  * notice, this list of conditions and the following disclaimer in the
  11  * documentation and/or other materials provided with the distribution;
  12  * neither the name of the copyright holders nor the names of its
  13  * contributors may be used to endorse or promote products derived from
  14  * this software without specific prior written permission.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  17  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  20  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  *
  28  * Authors: Steve Reinhardt
  29  */
  30
  31 #include "arch/locked_mem.hh"
  32 #include "arch/mmapped_ipr.hh"
  33 #include "arch/utility.hh"
  34 #include "base/bigint.hh"
  35 #include "config/the_isa.hh"
  36 #include "cpu/simple/atomic.hh"
  37 #include "cpu/exetrace.hh"
  38 #include "debug/ExecFaulting.hh"
  39 #include "debug/SimpleCPU.hh"
  40 #include "mem/packet.hh"
  41 #include "mem/packet_access.hh"
  42 #include "params/AtomicSimpleCPU.hh"
  43 #include "sim/faults.hh"
  44 #include "sim/system.hh"
  45 #include "sim/full_system.hh"
  46
  47 using namespace std;
  48 using namespace TheISA;
  49
  50 AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c)
  51     : Event(CPU_Tick_Pri), cpu(c)
  52 {
  53 }
  54
  55
  56 void
  57 AtomicSimpleCPU::TickEvent::process()
  58 {
  59     cpu->tick();
  60 }
  61
  62 const char *
  63 AtomicSimpleCPU::TickEvent::description() const
  64 {
  65     return "AtomicSimpleCPU tick";
  66 }
  67
  68 Port *
  69 AtomicSimpleCPU::getPort(const string &if_name, int idx)
  70 {
  71     if (if_name == "dcache_port")
  72         return &dcachePort;
  73     else if (if_name == "icache_port")
  74         return &icachePort;
  75     else if (if_name == "physmem_port") {
  76         hasPhysMemPort = true;
  77         return &physmemPort;
  78     }
  79     else
  80         panic("No Such Port\n");
  81 }
  82
  83 void
  84 AtomicSimpleCPU::init()
  85 {
  86     BaseCPU::init();
  87     if (FullSystem) {
  88         ThreadID size = threadContexts.size();
  89         for (ThreadID i = 0; i < size; ++i) {
  90             ThreadContext *tc = threadContexts[i];
  91             // initialize CPU, including PC
  92             TheISA::initCPU(tc, tc->contextId());
  93         }
  94     }
  95
  96     // Initialise the ThreadContext's memory proxies
  97     tcBase()->initMemProxies(tcBase());
  98
  99     if (hasPhysMemPort) {
 100         AddrRangeList pmAddrList = physmemPort.getPeer()->getAddrRanges();
 101         physMemAddr = *pmAddrList.begin();
 102     }
 103     // Atomic doesn't do MT right now, so contextId == threadId
 104     ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
 105     data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
 106     data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
 107 }
 108
 109 AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
 110     : BaseSimpleCPU(p), tickEvent(this), width(p->width), locked(false),
 111       simulate_data_stalls(p->simulate_data_stalls),
 112       simulate_inst_stalls(p->simulate_inst_stalls),
 113       icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this),
 114       physmemPort(name() + "-iport", this), hasPhysMemPort(false)
 115 {
 116     _status = Idle;
 117 }
 118
 119
 120 AtomicSimpleCPU::~AtomicSimpleCPU()
 121 {
 122     if (tickEvent.scheduled()) {
 123         deschedule(tickEvent);
 124     }
 125 }
 126
 127 void
 128 AtomicSimpleCPU::serialize(ostream &os)
 129 {
 130     SimObject::State so_state = SimObject::getState();
 131     SERIALIZE_ENUM(so_state);
 132     SERIALIZE_SCALAR(locked);
 133     BaseSimpleCPU::serialize(os);
 134     nameOut(os, csprintf("%s.tickEvent", name()));
 135     tickEvent.serialize(os);
 136 }
 137
 138 void
 139 AtomicSimpleCPU::unserialize(Checkpoint *cp, const string &section)
 140 {
 141     SimObject::State so_state;
 142     UNSERIALIZE_ENUM(so_state);
 143     UNSERIALIZE_SCALAR(locked);
 144     BaseSimpleCPU::unserialize(cp, section);
 145     tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
 146 }
 147
 148 void
 149 AtomicSimpleCPU::resume()
 150 {
 151     if (_status == Idle || _status == SwitchedOut)
 152         return;
 153
 154     DPRINTF(SimpleCPU, "Resume\n");
 155     assert(system->getMemoryMode() == Enums::atomic);
 156
 157     changeState(SimObject::Running);
 158     if (thread->status() == ThreadContext::Active) {
 159         if (!tickEvent.scheduled())
 160             schedule(tickEvent, nextCycle());
 161     }
 162     system->totalNumInsts = 0;
 163 }
 164
 165 void
 166 AtomicSimpleCPU::switchOut()
 167 {
 168     assert(_status == Running || _status == Idle);
 169     _status = SwitchedOut;
 170
 171     tickEvent.squash();
 172 }
 173
 174
 175 void
 176 AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
 177 {
 178     BaseCPU::takeOverFrom(oldCPU);
 179
 180     assert(!tickEvent.scheduled());
 181
 182     // if any of this CPU's ThreadContexts are active, mark the CPU as
 183     // running and schedule its tick event.
 184     ThreadID size = threadContexts.size();
 185     for (ThreadID i = 0; i < size; ++i) {
 186         ThreadContext *tc = threadContexts[i];
 187         if (tc->status() == ThreadContext::Active && _status != Running) {
 188             _status = Running;
 189             schedule(tickEvent, nextCycle());
 190             break;
 191         }
 192     }
 193     if (_status != Running) {
 194         _status = Idle;
 195     }
 196     assert(threadContexts.size() == 1);
 197     ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
 198     data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
 199     data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
 200 }
 201
 202
 203 void
 204 AtomicSimpleCPU::activateContext(ThreadID thread_num, int delay)
 205 {
 206     DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay);
 207
 208     assert(thread_num == 0);
 209     assert(thread);
 210
 211     assert(_status == Idle);
 212     assert(!tickEvent.scheduled());
 213
 214     notIdleFraction++;
 215     numCycles += tickToCycles(thread->lastActivate - thread->lastSuspend);
 216
 217     //Make sure ticks are still on multiples of cycles
 218     schedule(tickEvent, nextCycle(curTick() + ticks(delay)));
 219     _status = Running;
 220 }
 221
 222
 223 void
 224 AtomicSimpleCPU::suspendContext(ThreadID thread_num)
 225 {
 226     DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
 227
 228     assert(thread_num == 0);
 229     assert(thread);
 230
 231     if (_status == Idle)
 232         return;
 233
 234     assert(_status == Running);
 235
 236     // tick event may not be scheduled if this gets called from inside
 237     // an instruction's execution, e.g. "quiesce"
 238     if (tickEvent.scheduled())
 239         deschedule(tickEvent);
 240
 241     notIdleFraction--;
 242     _status = Idle;
 243 }
 244
 245
 246 Fault
 247 AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
 248                          unsigned size, unsigned flags)
 249 {
 250     // use the CPU's statically allocated read request and packet objects
 251     Request *req = &data_read_req;
 252
 253     if (traceData) {
 254         traceData->setAddr(addr);
 255     }
 256
 257     //The block size of our peer.
 258     unsigned blockSize = dcachePort.peerBlockSize();
 259     //The size of the data we're trying to read.
 260     int fullSize = size;
 261
 262     //The address of the second part of this access if it needs to be split
 263     //across a cache line boundary.
 264     Addr secondAddr = roundDown(addr + size - 1, blockSize);
 265
 266     if (secondAddr > addr)
 267         size = secondAddr - addr;
 268
 269     dcache_latency = 0;
 270
 271     while (1) {
 272         req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
 273
 274         // translate to physical address
 275         Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
 276
 277         // Now do the access.
 278         if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
 279             Packet pkt = Packet(req,
 280                     req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq,
 281                     Packet::Broadcast);
 282             pkt.dataStatic(data);
 283
 284             if (req->isMmappedIpr())
 285                 dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt);
 286             else {
 287                 if (hasPhysMemPort && pkt.getAddr() == physMemAddr)
 288                     dcache_latency += physmemPort.sendAtomic(&pkt);
 289                 else
 290                     dcache_latency += dcachePort.sendAtomic(&pkt);
 291             }
 292             dcache_access = true;
 293
 294             assert(!pkt.isError());
 295
 296             if (req->isLLSC()) {
 297                 TheISA::handleLockedRead(thread, req);
 298             }
 299         }
 300
 301         //If there's a fault, return it
 302         if (fault != NoFault) {
 303             if (req->isPrefetch()) {
 304                 return NoFault;
 305             } else {
 306                 return fault;
 307             }
 308         }
 309
 310         //If we don't need to access a second cache line, stop now.
 311         if (secondAddr <= addr)
 312         {
 313             if (req->isLocked() && fault == NoFault) {
 314                 assert(!locked);
 315                 locked = true;
 316             }
 317             return fault;
 318         }
 319
 320         /*
 321          * Set up for accessing the second cache line.
 322          */
 323
 324         //Move the pointer we're reading into to the correct location.
 325         data += size;
 326         //Adjust the size to get the remaining bytes.
 327         size = addr + fullSize - secondAddr;
 328         //And access the right address.
 329         addr = secondAddr;
 330     }
 331 }
 332
 333
 334 Fault
 335 AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
 336                           Addr addr, unsigned flags, uint64_t *res)
 337 {
 338     // use the CPU's statically allocated write request and packet objects
 339     Request *req = &data_write_req;
 340
 341     if (traceData) {
 342         traceData->setAddr(addr);
 343     }
 344
 345     //The block size of our peer.
 346     unsigned blockSize = dcachePort.peerBlockSize();
 347     //The size of the data we're trying to read.
 348     int fullSize = size;
 349
 350     //The address of the second part of this access if it needs to be split
 351     //across a cache line boundary.
 352     Addr secondAddr = roundDown(addr + size - 1, blockSize);
 353
 354     if(secondAddr > addr)
 355         size = secondAddr - addr;
 356
 357     dcache_latency = 0;
 358
 359     while(1) {
 360         req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
 361
 362         // translate to physical address
 363         Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
 364
 365         // Now do the access.
 366         if (fault == NoFault) {
 367             MemCmd cmd = MemCmd::WriteReq; // default
 368             bool do_access = true;  // flag to suppress cache access
 369
 370             if (req->isLLSC()) {
 371                 cmd = MemCmd::StoreCondReq;
 372                 do_access = TheISA::handleLockedWrite(thread, req);
 373             } else if (req->isSwap()) {
 374                 cmd = MemCmd::SwapReq;
 375                 if (req->isCondSwap()) {
 376                     assert(res);
 377                     req->setExtraData(*res);
 378                 }
 379             }
 380
 381             if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) {
 382                 Packet pkt = Packet(req, cmd, Packet::Broadcast);
 383                 pkt.dataStatic(data);
 384
 385                 if (req->isMmappedIpr()) {
 386                     dcache_latency +=
 387                         TheISA::handleIprWrite(thread->getTC(), &pkt);
 388                 } else {
 389                     if (hasPhysMemPort && pkt.getAddr() == physMemAddr)
 390                         dcache_latency += physmemPort.sendAtomic(&pkt);
 391                     else
 392                         dcache_latency += dcachePort.sendAtomic(&pkt);
 393                 }
 394                 dcache_access = true;
 395                 assert(!pkt.isError());
 396
 397                 if (req->isSwap()) {
 398                     assert(res);
 399                     memcpy(res, pkt.getPtr<uint8_t>(), fullSize);
 400                 }
 401             }
 402
 403             if (res && !req->isSwap()) {
 404                 *res = req->getExtraData();
 405             }
 406         }
 407
 408         //If there's a fault or we don't need to access a second cache line,
 409         //stop now.
 410         if (fault != NoFault || secondAddr <= addr)
 411         {
 412             if (req->isLocked() && fault == NoFault) {
 413                 assert(locked);
 414                 locked = false;
 415             }
 416             if (fault != NoFault && req->isPrefetch()) {
 417                 return NoFault;
 418             } else {
 419                 return fault;
 420             }
 421         }
 422
 423         /*
 424          * Set up for accessing the second cache line.
 425          */
 426
 427         //Move the pointer we're reading into to the correct location.
 428         data += size;
 429         //Adjust the size to get the remaining bytes.
 430         size = addr + fullSize - secondAddr;
 431         //And access the right address.
 432         addr = secondAddr;
 433     }
 434 }
 435
 436
 437 void
 438 AtomicSimpleCPU::tick()
 439 {
 440     DPRINTF(SimpleCPU, "Tick\n");
 441
 442     Tick latency = 0;
 443
 444     for (int i = 0; i < width || locked; ++i) {
 445         numCycles++;
 446
 447         if (!curStaticInst || !curStaticInst->isDelayedCommit())
 448             checkForInterrupts();
 449
 450         checkPcEventQueue();
 451         // We must have just got suspended by a PC event
 452         if (_status == Idle)
 453             return;
 454
 455         Fault fault = NoFault;
 456
 457         TheISA::PCState pcState = thread->pcState();
 458
 459         bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
 460                            !curMacroStaticInst;
 461         if (needToFetch) {
 462             setupFetchRequest(&ifetch_req);
 463             fault = thread->itb->translateAtomic(&ifetch_req, tc,
 464                                                  BaseTLB::Execute);
 465         }
 466
 467         if (fault == NoFault) {
 468             Tick icache_latency = 0;
 469             bool icache_access = false;
 470             dcache_access = false; // assume no dcache access
 471
 472             if (needToFetch) {
 473                 // This is commented out because the predecoder would act like
 474                 // a tiny cache otherwise. It wouldn't be flushed when needed
 475                 // like the I cache. It should be flushed, and when that works
 476                 // this code should be uncommented.
 477                 //Fetch more instruction memory if necessary
 478                 //if(predecoder.needMoreBytes())
 479                 //{
 480                     icache_access = true;
 481                     Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq,
 482                                                Packet::Broadcast);
 483                     ifetch_pkt.dataStatic(&inst);
 484
 485                     if (hasPhysMemPort && ifetch_pkt.getAddr() == physMemAddr)
 486                         icache_latency = physmemPort.sendAtomic(&ifetch_pkt);
 487                     else
 488                         icache_latency = icachePort.sendAtomic(&ifetch_pkt);
 489
 490                     assert(!ifetch_pkt.isError());
 491
 492                     // ifetch_req is initialized to read the instruction directly
 493                     // into the CPU object's inst field.
 494                 //}
 495             }
 496
 497             preExecute();
 498
 499             if (curStaticInst) {
 500                 fault = curStaticInst->execute(this, traceData);
 501
 502                 // keep an instruction count
 503                 if (fault == NoFault)
 504                     countInst();
 505                 else if (traceData && !DTRACE(ExecFaulting)) {
 506                     delete traceData;
 507                     traceData = NULL;
 508                 }
 509
 510                 postExecute();
 511             }
 512
 513             // @todo remove me after debugging with legion done
 514             if (curStaticInst && (!curStaticInst->isMicroop() ||
 515                         curStaticInst->isFirstMicroop()))
 516                 instCnt++;
 517
 518             Tick stall_ticks = 0;
 519             if (simulate_inst_stalls && icache_access)
 520                 stall_ticks += icache_latency;
 521
 522             if (simulate_data_stalls && dcache_access)
 523                 stall_ticks += dcache_latency;
 524
 525             if (stall_ticks) {
 526                 Tick stall_cycles = stall_ticks / ticks(1);
 527                 Tick aligned_stall_ticks = ticks(stall_cycles);
 528
 529                 if (aligned_stall_ticks < stall_ticks)
 530                     aligned_stall_ticks += 1;
 531
 532                 latency += aligned_stall_ticks;
 533             }
 534
 535         }
 536         if(fault != NoFault || !stayAtPC)
 537             advancePC(fault);
 538     }
 539
 540     // instruction takes at least one cycle
 541     if (latency < ticks(1))
 542         latency = ticks(1);
 543
 544     if (_status != Idle)
 545         schedule(tickEvent, curTick() + latency);
 546 }
 547
 548
 549 void
 550 AtomicSimpleCPU::printAddr(Addr a)
 551 {
 552     dcachePort.printAddr(a);
 553 }
 554
 555
 556 ////////////////////////////////////////////////////////////////////////
 557 //
 558 //  AtomicSimpleCPU Simulation Object
 559 //
 560 AtomicSimpleCPU *
 561 AtomicSimpleCPUParams::create()
 562 {
 563     numThreads = 1;
 564     if (!FullSystem && workload.size() != 1)
 565         panic("only one workload allowed");
 566     return new AtomicSimpleCPU(this);
 567 }