src/cpu/simple/atomic.cc

   1 /*
   2  * Copyright (c) 2002-2005 The Regents of The University of Michigan
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions are
   7  * met: redistributions of source code must retain the above copyright
   8  * notice, this list of conditions and the following disclaimer;
   9  * redistributions in binary form must reproduce the above copyright
  10  * notice, this list of conditions and the following disclaimer in the
  11  * documentation and/or other materials provided with the distribution;
  12  * neither the name of the copyright holders nor the names of its
  13  * contributors may be used to endorse or promote products derived from
  14  * this software without specific prior written permission.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  17  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  20  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  *
  28  * Authors: Steve Reinhardt
  29  */
  30
  31 #include "arch/locked_mem.hh"
  32 #include "arch/mmaped_ipr.hh"
  33 #include "arch/utility.hh"
  34 #include "base/bigint.hh"
  35 #include "cpu/exetrace.hh"
  36 #include "cpu/simple/atomic.hh"
  37 #include "mem/packet.hh"
  38 #include "mem/packet_access.hh"
  39 #include "params/AtomicSimpleCPU.hh"
  40 #include "sim/system.hh"
  41
  42 using namespace std;
  43 using namespace TheISA;
  44
  45 AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c)
  46     : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c)
  47 {
  48 }
  49
  50
  51 void
  52 AtomicSimpleCPU::TickEvent::process()
  53 {
  54     cpu->tick();
  55 }
  56
  57 const char *
  58 AtomicSimpleCPU::TickEvent::description()
  59 {
  60     return "AtomicSimpleCPU tick";
  61 }
  62
  63 Port *
  64 AtomicSimpleCPU::getPort(const std::string &if_name, int idx)
  65 {
  66     if (if_name == "dcache_port")
  67         return &dcachePort;
  68     else if (if_name == "icache_port")
  69         return &icachePort;
  70     else if (if_name == "physmem_port") {
  71         hasPhysMemPort = true;
  72         return &physmemPort;
  73     }
  74     else
  75         panic("No Such Port\n");
  76 }
  77
  78 void
  79 AtomicSimpleCPU::init()
  80 {
  81     BaseCPU::init();
  82 #if FULL_SYSTEM
  83     for (int i = 0; i < threadContexts.size(); ++i) {
  84         ThreadContext *tc = threadContexts[i];
  85
  86         // initialize CPU, including PC
  87         TheISA::initCPU(tc, tc->readCpuId());
  88     }
  89 #endif
  90     if (hasPhysMemPort) {
  91         bool snoop = false;
  92         AddrRangeList pmAddrList;
  93         physmemPort.getPeerAddressRanges(pmAddrList, snoop);
  94         physMemAddr = *pmAddrList.begin();
  95     }
  96 }
  97
  98 bool
  99 AtomicSimpleCPU::CpuPort::recvTiming(PacketPtr pkt)
 100 {
 101     panic("AtomicSimpleCPU doesn't expect recvTiming callback!");
 102     return true;
 103 }
 104
 105 Tick
 106 AtomicSimpleCPU::CpuPort::recvAtomic(PacketPtr pkt)
 107 {
 108     //Snooping a coherence request, just return
 109     return 0;
 110 }
 111
 112 void
 113 AtomicSimpleCPU::CpuPort::recvFunctional(PacketPtr pkt)
 114 {
 115     //No internal storage to update, just return
 116     return;
 117 }
 118
 119 void
 120 AtomicSimpleCPU::CpuPort::recvStatusChange(Status status)
 121 {
 122     if (status == RangeChange) {
 123         if (!snoopRangeSent) {
 124             snoopRangeSent = true;
 125             sendStatusChange(Port::RangeChange);
 126         }
 127         return;
 128     }
 129
 130     panic("AtomicSimpleCPU doesn't expect recvStatusChange callback!");
 131 }
 132
 133 void
 134 AtomicSimpleCPU::CpuPort::recvRetry()
 135 {
 136     panic("AtomicSimpleCPU doesn't expect recvRetry callback!");
 137 }
 138
 139 void
 140 AtomicSimpleCPU::DcachePort::setPeer(Port *port)
 141 {
 142     Port::setPeer(port);
 143
 144 #if FULL_SYSTEM
 145     // Update the ThreadContext's memory ports (Functional/Virtual
 146     // Ports)
 147     cpu->tcBase()->connectMemPorts();
 148 #endif
 149 }
 150
 151 AtomicSimpleCPU::AtomicSimpleCPU(Params *p)
 152     : BaseSimpleCPU(p), tickEvent(this),
 153       width(p->width), simulate_stalls(p->simulate_stalls),
 154       icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this),
 155       physmemPort(name() + "-iport", this), hasPhysMemPort(false)
 156 {
 157     _status = Idle;
 158
 159     icachePort.snoopRangeSent = false;
 160     dcachePort.snoopRangeSent = false;
 161
 162     ifetch_req.setThreadContext(p->cpu_id, 0); // Add thread ID if we add MT
 163     data_read_req.setThreadContext(p->cpu_id, 0); // Add thread ID here too
 164     data_write_req.setThreadContext(p->cpu_id, 0); // Add thread ID here too
 165 }
 166
 167
 168 AtomicSimpleCPU::~AtomicSimpleCPU()
 169 {
 170 }
 171
 172 void
 173 AtomicSimpleCPU::serialize(ostream &os)
 174 {
 175     SimObject::State so_state = SimObject::getState();
 176     SERIALIZE_ENUM(so_state);
 177     Status _status = status();
 178     SERIALIZE_ENUM(_status);
 179     BaseSimpleCPU::serialize(os);
 180     nameOut(os, csprintf("%s.tickEvent", name()));
 181     tickEvent.serialize(os);
 182 }
 183
 184 void
 185 AtomicSimpleCPU::unserialize(Checkpoint *cp, const string &section)
 186 {
 187     SimObject::State so_state;
 188     UNSERIALIZE_ENUM(so_state);
 189     UNSERIALIZE_ENUM(_status);
 190     BaseSimpleCPU::unserialize(cp, section);
 191     tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
 192 }
 193
 194 void
 195 AtomicSimpleCPU::resume()
 196 {
 197     DPRINTF(SimpleCPU, "Resume\n");
 198     if (_status != SwitchedOut && _status != Idle) {
 199         assert(system->getMemoryMode() == Enums::atomic);
 200
 201         changeState(SimObject::Running);
 202         if (thread->status() == ThreadContext::Active) {
 203             if (!tickEvent.scheduled()) {
 204                 tickEvent.schedule(nextCycle());
 205             }
 206         }
 207     }
 208 }
 209
 210 void
 211 AtomicSimpleCPU::switchOut()
 212 {
 213     assert(status() == Running || status() == Idle);
 214     _status = SwitchedOut;
 215
 216     tickEvent.squash();
 217 }
 218
 219
 220 void
 221 AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
 222 {
 223     BaseCPU::takeOverFrom(oldCPU, &icachePort, &dcachePort);
 224
 225     assert(!tickEvent.scheduled());
 226
 227     // if any of this CPU's ThreadContexts are active, mark the CPU as
 228     // running and schedule its tick event.
 229     for (int i = 0; i < threadContexts.size(); ++i) {
 230         ThreadContext *tc = threadContexts[i];
 231         if (tc->status() == ThreadContext::Active && _status != Running) {
 232             _status = Running;
 233             tickEvent.schedule(nextCycle());
 234             break;
 235         }
 236     }
 237     if (_status != Running) {
 238         _status = Idle;
 239     }
 240 }
 241
 242
 243 void
 244 AtomicSimpleCPU::activateContext(int thread_num, int delay)
 245 {
 246     DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay);
 247
 248     assert(thread_num == 0);
 249     assert(thread);
 250
 251     assert(_status == Idle);
 252     assert(!tickEvent.scheduled());
 253
 254     notIdleFraction++;
 255     numCycles += tickToCycles(thread->lastActivate - thread->lastSuspend);
 256
 257     //Make sure ticks are still on multiples of cycles
 258     tickEvent.schedule(nextCycle(curTick + ticks(delay)));
 259     _status = Running;
 260 }
 261
 262
 263 void
 264 AtomicSimpleCPU::suspendContext(int thread_num)
 265 {
 266     DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
 267
 268     assert(thread_num == 0);
 269     assert(thread);
 270
 271     assert(_status == Running);
 272
 273     // tick event may not be scheduled if this gets called from inside
 274     // an instruction's execution, e.g. "quiesce"
 275     if (tickEvent.scheduled())
 276         tickEvent.deschedule();
 277
 278     notIdleFraction--;
 279     _status = Idle;
 280 }
 281
 282
 283 template <class T>
 284 Fault
 285 AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
 286 {
 287     // use the CPU's statically allocated read request and packet objects
 288     Request *req = &data_read_req;
 289
 290     if (traceData) {
 291         traceData->setAddr(addr);
 292     }
 293
 294     //The block size of our peer.
 295     int blockSize = dcachePort.peerBlockSize();
 296     //The size of the data we're trying to read.
 297     int dataSize = sizeof(T);
 298
 299     uint8_t * dataPtr = (uint8_t *)&data;
 300
 301     //The address of the second part of this access if it needs to be split
 302     //across a cache line boundary.
 303     Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
 304
 305     if(secondAddr > addr)
 306         dataSize = secondAddr - addr;
 307
 308     dcache_latency = 0;
 309
 310     while(1) {
 311         req->setVirt(0, addr, dataSize, flags, thread->readPC());
 312
 313         // translate to physical address
 314         Fault fault = thread->translateDataReadReq(req);
 315
 316         // Now do the access.
 317         if (fault == NoFault) {
 318             Packet pkt = Packet(req,
 319                     req->isLocked() ? MemCmd::LoadLockedReq : MemCmd::ReadReq,
 320                     Packet::Broadcast);
 321             pkt.dataStatic(dataPtr);
 322
 323             if (req->isMmapedIpr())
 324                 dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt);
 325             else {
 326                 if (hasPhysMemPort && pkt.getAddr() == physMemAddr)
 327                     dcache_latency += physmemPort.sendAtomic(&pkt);
 328                 else
 329                     dcache_latency += dcachePort.sendAtomic(&pkt);
 330             }
 331             dcache_access = true;
 332
 333             assert(!pkt.isError());
 334
 335             if (req->isLocked()) {
 336                 TheISA::handleLockedRead(thread, req);
 337             }
 338         }
 339
 340         // This will need a new way to tell if it has a dcache attached.
 341         if (req->isUncacheable())
 342             recordEvent("Uncached Read");
 343
 344         //If there's a fault, return it
 345         if (fault != NoFault)
 346             return fault;
 347         //If we don't need to access a second cache line, stop now.
 348         if (secondAddr <= addr)
 349         {
 350             data = gtoh(data);
 351             return fault;
 352         }
 353
 354         /*
 355          * Set up for accessing the second cache line.
 356          */
 357
 358         //Move the pointer we're reading into to the correct location.
 359         dataPtr += dataSize;
 360         //Adjust the size to get the remaining bytes.
 361         dataSize = addr + sizeof(T) - secondAddr;
 362         //And access the right address.
 363         addr = secondAddr;
 364     }
 365 }
 366
 367 #ifndef DOXYGEN_SHOULD_SKIP_THIS
 368
 369 template
 370 Fault
 371 AtomicSimpleCPU::read(Addr addr, Twin32_t &data, unsigned flags);
 372
 373 template
 374 Fault
 375 AtomicSimpleCPU::read(Addr addr, Twin64_t &data, unsigned flags);
 376
 377 template
 378 Fault
 379 AtomicSimpleCPU::read(Addr addr, uint64_t &data, unsigned flags);
 380
 381 template
 382 Fault
 383 AtomicSimpleCPU::read(Addr addr, uint32_t &data, unsigned flags);
 384
 385 template
 386 Fault
 387 AtomicSimpleCPU::read(Addr addr, uint16_t &data, unsigned flags);
 388
 389 template
 390 Fault
 391 AtomicSimpleCPU::read(Addr addr, uint8_t &data, unsigned flags);
 392
 393 #endif //DOXYGEN_SHOULD_SKIP_THIS
 394
 395 template<>
 396 Fault
 397 AtomicSimpleCPU::read(Addr addr, double &data, unsigned flags)
 398 {
 399     return read(addr, *(uint64_t*)&data, flags);
 400 }
 401
 402 template<>
 403 Fault
 404 AtomicSimpleCPU::read(Addr addr, float &data, unsigned flags)
 405 {
 406     return read(addr, *(uint32_t*)&data, flags);
 407 }
 408
 409
 410 template<>
 411 Fault
 412 AtomicSimpleCPU::read(Addr addr, int32_t &data, unsigned flags)
 413 {
 414     return read(addr, (uint32_t&)data, flags);
 415 }
 416
 417
 418 template <class T>
 419 Fault
 420 AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
 421 {
 422     // use the CPU's statically allocated write request and packet objects
 423     Request *req = &data_write_req;
 424
 425     if (traceData) {
 426         traceData->setAddr(addr);
 427     }
 428
 429     //The block size of our peer.
 430     int blockSize = dcachePort.peerBlockSize();
 431     //The size of the data we're trying to read.
 432     int dataSize = sizeof(T);
 433
 434     uint8_t * dataPtr = (uint8_t *)&data;
 435
 436     //The address of the second part of this access if it needs to be split
 437     //across a cache line boundary.
 438     Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
 439
 440     if(secondAddr > addr)
 441         dataSize = secondAddr - addr;
 442
 443     dcache_latency = 0;
 444
 445     while(1) {
 446         req->setVirt(0, addr, dataSize, flags, thread->readPC());
 447
 448         // translate to physical address
 449         Fault fault = thread->translateDataWriteReq(req);
 450
 451         // Now do the access.
 452         if (fault == NoFault) {
 453             MemCmd cmd = MemCmd::WriteReq; // default
 454             bool do_access = true;  // flag to suppress cache access
 455
 456             if (req->isLocked()) {
 457                 cmd = MemCmd::StoreCondReq;
 458                 do_access = TheISA::handleLockedWrite(thread, req);
 459             } else if (req->isSwap()) {
 460                 cmd = MemCmd::SwapReq;
 461                 if (req->isCondSwap()) {
 462                     assert(res);
 463                     req->setExtraData(*res);
 464                 }
 465             }
 466
 467             if (do_access) {
 468                 Packet pkt = Packet(req, cmd, Packet::Broadcast);
 469                 pkt.dataStatic(dataPtr);
 470
 471                 if (req->isMmapedIpr()) {
 472                     dcache_latency +=
 473                         TheISA::handleIprWrite(thread->getTC(), &pkt);
 474                 } else {
 475                     //XXX This needs to be outside of the loop in order to
 476                     //work properly for cache line boundary crossing
 477                     //accesses in transendian simulations.
 478                     data = htog(data);
 479                     if (hasPhysMemPort && pkt.getAddr() == physMemAddr)
 480                         dcache_latency += physmemPort.sendAtomic(&pkt);
 481                     else
 482                         dcache_latency += dcachePort.sendAtomic(&pkt);
 483                 }
 484                 dcache_access = true;
 485                 assert(!pkt.isError());
 486
 487                 if (req->isSwap()) {
 488                     assert(res);
 489                     *res = pkt.get<T>();
 490                 }
 491             }
 492
 493             if (res && !req->isSwap()) {
 494                 *res = req->getExtraData();
 495             }
 496         }
 497
 498         // This will need a new way to tell if it's hooked up to a cache or not.
 499         if (req->isUncacheable())
 500             recordEvent("Uncached Write");
 501
 502         //If there's a fault or we don't need to access a second cache line,
 503         //stop now.
 504         if (fault != NoFault || secondAddr <= addr)
 505         {
 506             // If the write needs to have a fault on the access, consider
 507             // calling changeStatus() and changing it to "bad addr write"
 508             // or something.
 509             return fault;
 510         }
 511
 512         /*
 513          * Set up for accessing the second cache line.
 514          */
 515
 516         //Move the pointer we're reading into to the correct location.
 517         dataPtr += dataSize;
 518         //Adjust the size to get the remaining bytes.
 519         dataSize = addr + sizeof(T) - secondAddr;
 520         //And access the right address.
 521         addr = secondAddr;
 522     }
 523 }
 524
 525
 526 #ifndef DOXYGEN_SHOULD_SKIP_THIS
 527
 528 template
 529 Fault
 530 AtomicSimpleCPU::write(Twin32_t data, Addr addr,
 531                        unsigned flags, uint64_t *res);
 532
 533 template
 534 Fault
 535 AtomicSimpleCPU::write(Twin64_t data, Addr addr,
 536                        unsigned flags, uint64_t *res);
 537
 538 template
 539 Fault
 540 AtomicSimpleCPU::write(uint64_t data, Addr addr,
 541                        unsigned flags, uint64_t *res);
 542
 543 template
 544 Fault
 545 AtomicSimpleCPU::write(uint32_t data, Addr addr,
 546                        unsigned flags, uint64_t *res);
 547
 548 template
 549 Fault
 550 AtomicSimpleCPU::write(uint16_t data, Addr addr,
 551                        unsigned flags, uint64_t *res);
 552
 553 template
 554 Fault
 555 AtomicSimpleCPU::write(uint8_t data, Addr addr,
 556                        unsigned flags, uint64_t *res);
 557
 558 #endif //DOXYGEN_SHOULD_SKIP_THIS
 559
 560 template<>
 561 Fault
 562 AtomicSimpleCPU::write(double data, Addr addr, unsigned flags, uint64_t *res)
 563 {
 564     return write(*(uint64_t*)&data, addr, flags, res);
 565 }
 566
 567 template<>
 568 Fault
 569 AtomicSimpleCPU::write(float data, Addr addr, unsigned flags, uint64_t *res)
 570 {
 571     return write(*(uint32_t*)&data, addr, flags, res);
 572 }
 573
 574
 575 template<>
 576 Fault
 577 AtomicSimpleCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res)
 578 {
 579     return write((uint32_t)data, addr, flags, res);
 580 }
 581
 582
 583 void
 584 AtomicSimpleCPU::tick()
 585 {
 586     DPRINTF(SimpleCPU, "Tick\n");
 587
 588     Tick latency = ticks(1); // instruction takes one cycle by default
 589
 590     for (int i = 0; i < width; ++i) {
 591         numCycles++;
 592
 593         if (!curStaticInst || !curStaticInst->isDelayedCommit())
 594             checkForInterrupts();
 595
 596         Fault fault = setupFetchRequest(&ifetch_req);
 597
 598         if (fault == NoFault) {
 599             Tick icache_latency = 0;
 600             bool icache_access = false;
 601             dcache_access = false; // assume no dcache access
 602
 603             //Fetch more instruction memory if necessary
 604             //if(predecoder.needMoreBytes())
 605             //{
 606                 icache_access = true;
 607                 Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq,
 608                                            Packet::Broadcast);
 609                 ifetch_pkt.dataStatic(&inst);
 610
 611                 if (hasPhysMemPort && ifetch_pkt.getAddr() == physMemAddr)
 612                     icache_latency = physmemPort.sendAtomic(&ifetch_pkt);
 613                 else
 614                     icache_latency = icachePort.sendAtomic(&ifetch_pkt);
 615
 616                 assert(!ifetch_pkt.isError());
 617
 618                 // ifetch_req is initialized to read the instruction directly
 619                 // into the CPU object's inst field.
 620             //}
 621
 622             preExecute();
 623
 624             if (curStaticInst) {
 625                 fault = curStaticInst->execute(this, traceData);
 626
 627                 // keep an instruction count
 628                 if (fault == NoFault)
 629                     countInst();
 630                 else if (traceData) {
 631                     // If there was a fault, we should trace this instruction.
 632                     delete traceData;
 633                     traceData = NULL;
 634                 }
 635
 636                 postExecute();
 637             }
 638
 639             // @todo remove me after debugging with legion done
 640             if (curStaticInst && (!curStaticInst->isMicroop() ||
 641                         curStaticInst->isFirstMicroop()))
 642                 instCnt++;
 643
 644             if (simulate_stalls) {
 645                 Tick icache_stall =
 646                     icache_access ? icache_latency - ticks(1) : 0;
 647                 Tick dcache_stall =
 648                     dcache_access ? dcache_latency - ticks(1) : 0;
 649                 Tick stall_cycles = (icache_stall + dcache_stall) / ticks(1);
 650                 if (ticks(stall_cycles) < (icache_stall + dcache_stall))
 651                     latency += ticks(stall_cycles+1);
 652                 else
 653                     latency += ticks(stall_cycles);
 654             }
 655
 656         }
 657         if(fault != NoFault || !stayAtPC)
 658             advancePC(fault);
 659     }
 660
 661     if (_status != Idle)
 662         tickEvent.schedule(curTick + latency);
 663 }
 664
 665
 666 ////////////////////////////////////////////////////////////////////////
 667 //
 668 //  AtomicSimpleCPU Simulation Object
 669 //
 670 AtomicSimpleCPU *
 671 AtomicSimpleCPUParams::create()
 672 {
 673     AtomicSimpleCPU::Params *params = new AtomicSimpleCPU::Params();
 674     params->name = name;
 675     params->numberOfThreads = 1;
 676     params->max_insts_any_thread = max_insts_any_thread;
 677     params->max_insts_all_threads = max_insts_all_threads;
 678     params->max_loads_any_thread = max_loads_any_thread;
 679     params->max_loads_all_threads = max_loads_all_threads;
 680     params->progress_interval = progress_interval;
 681     params->deferRegistration = defer_registration;
 682     params->phase = phase;
 683     params->clock = clock;
 684     params->functionTrace = function_trace;
 685     params->functionTraceStart = function_trace_start;
 686     params->width = width;
 687     params->simulate_stalls = simulate_stalls;
 688     params->system = system;
 689     params->cpu_id = cpu_id;
 690     params->tracer = tracer;
 691
 692     params->itb = itb;
 693     params->dtb = dtb;
 694 #if FULL_SYSTEM
 695     params->profile = profile;
 696     params->do_quiesce = do_quiesce;
 697     params->do_checkpoint_insts = do_checkpoint_insts;
 698     params->do_statistics_insts = do_statistics_insts;
 699 #else
 700     if (workload.size() != 1)
 701         panic("only one workload allowed");
 702     params->process = workload[0];
 703 #endif
 704
 705     AtomicSimpleCPU *cpu = new AtomicSimpleCPU(params);
 706     return cpu;
 707 }