/*
- * Copyright (c) 2012 ARM Limited
+ * Copyright 2014 Google, Inc.
+ * Copyright (c) 2012-2013,2015 ARM Limited
* All rights reserved.
*
* The license below extends only to copyright in the software and shall
#include "arch/mmapped_ipr.hh"
#include "arch/utility.hh"
#include "base/bigint.hh"
+#include "base/output.hh"
#include "config/the_isa.hh"
#include "cpu/simple/atomic.hh"
#include "cpu/exetrace.hh"
+#include "debug/Drain.hh"
#include "debug/ExecFaulting.hh"
#include "debug/SimpleCPU.hh"
#include "mem/packet.hh"
void
AtomicSimpleCPU::init()
{
- BaseCPU::init();
+ BaseSimpleCPU::init();
- // Initialise the ThreadContext's memory proxies
- tcBase()->initMemProxies(tcBase());
-
- if (FullSystem && !params()->defer_registration) {
- ThreadID size = threadContexts.size();
- for (ThreadID i = 0; i < size; ++i) {
- ThreadContext *tc = threadContexts[i];
- // initialize CPU, including PC
- TheISA::initCPU(tc, tc->contextId());
- }
- }
-
- // Atomic doesn't do MT right now, so contextId == threadId
- ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
- data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
- data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
+ int cid = threadContexts[0]->contextId();
+ ifetch_req.setThreadContext(cid, 0);
+ data_read_req.setThreadContext(cid, 0);
+ data_write_req.setThreadContext(cid, 0);
}
AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
simulate_inst_stalls(p->simulate_inst_stalls),
icachePort(name() + ".icache_port", this),
dcachePort(name() + ".dcache_port", this),
- fastmem(p->fastmem)
+ fastmem(p->fastmem), dcache_access(false), dcache_latency(0),
+ ppCommit(nullptr)
{
_status = Idle;
}
}
}
-void
-AtomicSimpleCPU::serialize(ostream &os)
+DrainState
+AtomicSimpleCPU::drain()
{
- Drainable::State so_state(getDrainState());
- SERIALIZE_ENUM(so_state);
- SERIALIZE_SCALAR(locked);
- BaseSimpleCPU::serialize(os);
- nameOut(os, csprintf("%s.tickEvent", name()));
- tickEvent.serialize(os);
+ if (switchedOut())
+ return DrainState::Drained;
+
+ if (!isDrained()) {
+ DPRINTF(Drain, "Requesting drain.\n");
+ return DrainState::Draining;
+ } else {
+ if (tickEvent.scheduled())
+ deschedule(tickEvent);
+
+ activeThreads.clear();
+ DPRINTF(Drain, "Not executing microcode, no need to drain.\n");
+ return DrainState::Drained;
+ }
}
void
-AtomicSimpleCPU::unserialize(Checkpoint *cp, const string §ion)
+AtomicSimpleCPU::threadSnoop(PacketPtr pkt, ThreadID sender)
{
- Drainable::State so_state;
- UNSERIALIZE_ENUM(so_state);
- UNSERIALIZE_SCALAR(locked);
- BaseSimpleCPU::unserialize(cp, section);
- tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
-}
+ DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
+ pkt->cmdString());
-unsigned int
-AtomicSimpleCPU::drain(DrainManager *drain_manager)
-{
- setDrainState(Drainable::Drained);
- return 0;
+ for (ThreadID tid = 0; tid < numThreads; tid++) {
+ if (tid != sender) {
+ if (getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+ wakeup(tid);
+ }
+
+ TheISA::handleLockedSnoop(threadInfo[tid]->thread,
+ pkt, dcachePort.cacheBlockMask);
+ }
+ }
}
void
AtomicSimpleCPU::drainResume()
{
- if (_status == Idle || _status == SwitchedOut)
+ assert(!tickEvent.scheduled());
+ if (switchedOut())
return;
DPRINTF(SimpleCPU, "Resume\n");
- assert(system->getMemoryMode() == Enums::atomic);
+ verifyMemoryMode();
+
+ assert(!threadContexts.empty());
+
+ _status = BaseSimpleCPU::Idle;
- setDrainState(Drainable::Running);
- if (thread->status() == ThreadContext::Active) {
- if (!tickEvent.scheduled())
- schedule(tickEvent, nextCycle());
+ for (ThreadID tid = 0; tid < numThreads; tid++) {
+ if (threadInfo[tid]->thread->status() == ThreadContext::Active) {
+ threadInfo[tid]->notIdleFraction = 1;
+ activeThreads.push_back(tid);
+ _status = BaseSimpleCPU::Running;
+
+ // Tick if any threads active
+ if (!tickEvent.scheduled()) {
+ schedule(tickEvent, nextCycle());
+ }
+ } else {
+ threadInfo[tid]->notIdleFraction = 0;
+ }
}
- system->totalNumInsts = 0;
}
+bool
+AtomicSimpleCPU::tryCompleteDrain()
+{
+ if (drainState() != DrainState::Draining)
+ return false;
+
+ DPRINTF(Drain, "tryCompleteDrain.\n");
+ if (!isDrained())
+ return false;
+
+ DPRINTF(Drain, "CPU done draining, processing drain event\n");
+ signalDrainDone();
+
+ return true;
+}
+
+
void
AtomicSimpleCPU::switchOut()
{
- assert(_status == BaseSimpleCPU::Running || _status == Idle);
- _status = SwitchedOut;
+ BaseSimpleCPU::switchOut();
- tickEvent.squash();
+ assert(!tickEvent.scheduled());
+ assert(_status == BaseSimpleCPU::Running || _status == Idle);
+ assert(isDrained());
}
void
AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
{
- BaseCPU::takeOverFrom(oldCPU);
+ BaseSimpleCPU::takeOverFrom(oldCPU);
+ // The tick event should have been descheduled by drain()
assert(!tickEvent.scheduled());
+}
- // if any of this CPU's ThreadContexts are active, mark the CPU as
- // running and schedule its tick event.
- ThreadID size = threadContexts.size();
- for (ThreadID i = 0; i < size; ++i) {
- ThreadContext *tc = threadContexts[i];
- if (tc->status() == ThreadContext::Active &&
- _status != BaseSimpleCPU::Running) {
- _status = BaseSimpleCPU::Running;
- schedule(tickEvent, nextCycle());
- break;
- }
- }
- if (_status != BaseSimpleCPU::Running) {
- _status = Idle;
+void
+AtomicSimpleCPU::verifyMemoryMode() const
+{
+ if (!system->isAtomicMode()) {
+ fatal("The atomic CPU requires the memory system to be in "
+ "'atomic' mode.\n");
}
- assert(threadContexts.size() == 1);
- ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
- data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
- data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
}
-
void
-AtomicSimpleCPU::activateContext(ThreadID thread_num, Cycles delay)
+AtomicSimpleCPU::activateContext(ThreadID thread_num)
{
- DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay);
+ DPRINTF(SimpleCPU, "ActivateContext %d\n", thread_num);
- assert(thread_num == 0);
- assert(thread);
+ assert(thread_num < numThreads);
- assert(_status == Idle);
- assert(!tickEvent.scheduled());
-
- notIdleFraction++;
- numCycles += ticksToCycles(thread->lastActivate - thread->lastSuspend);
+ threadInfo[thread_num]->notIdleFraction = 1;
+ Cycles delta = ticksToCycles(threadInfo[thread_num]->thread->lastActivate -
+ threadInfo[thread_num]->thread->lastSuspend);
+ numCycles += delta;
+ ppCycles->notify(delta);
- //Make sure ticks are still on multiples of cycles
- schedule(tickEvent, clockEdge(delay));
+ if (!tickEvent.scheduled()) {
+ //Make sure ticks are still on multiples of cycles
+ schedule(tickEvent, clockEdge(Cycles(0)));
+ }
_status = BaseSimpleCPU::Running;
+ if (std::find(activeThreads.begin(), activeThreads.end(), thread_num)
+ == activeThreads.end()) {
+ activeThreads.push_back(thread_num);
+ }
}
{
DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
- assert(thread_num == 0);
- assert(thread);
+ assert(thread_num < numThreads);
+ activeThreads.remove(thread_num);
if (_status == Idle)
return;
assert(_status == BaseSimpleCPU::Running);
- // tick event may not be scheduled if this gets called from inside
- // an instruction's execution, e.g. "quiesce"
- if (tickEvent.scheduled())
- deschedule(tickEvent);
+ threadInfo[thread_num]->notIdleFraction = 0;
+
+ if (activeThreads.empty()) {
+ _status = Idle;
+
+ if (tickEvent.scheduled()) {
+ deschedule(tickEvent);
+ }
+ }
- notIdleFraction--;
- _status = Idle;
}
+Tick
+AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt)
+{
+ DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
+ pkt->cmdString());
+
+ // X86 ISA: Snooping an invalidation for monitor/mwait
+ AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
+
+ for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
+ if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+ cpu->wakeup(tid);
+ }
+ }
+
+ // if snoop invalidates, release any associated locks
+ if (pkt->isInvalidate()) {
+ DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
+ pkt->getAddr());
+ for (auto &t_info : cpu->threadInfo) {
+ TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
+ }
+ }
+
+ return 0;
+}
+
+void
+AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt)
+{
+ DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
+ pkt->cmdString());
+
+ // X86 ISA: Snooping an invalidation for monitor/mwait
+ AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
+ for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
+ if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+ cpu->wakeup(tid);
+ }
+ }
+
+ // if snoop invalidates, release any associated locks
+ if (pkt->isInvalidate()) {
+ DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
+ pkt->getAddr());
+ for (auto &t_info : cpu->threadInfo) {
+ TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
+ }
+ }
+}
+
Fault
AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
unsigned size, unsigned flags)
{
+ SimpleExecContext& t_info = *threadInfo[curThread];
+ SimpleThread* thread = t_info.thread;
+
// use the CPU's statically allocated read request and packet objects
Request *req = &data_read_req;
- if (traceData) {
- traceData->setAddr(addr);
- }
+ if (traceData)
+ traceData->setMem(addr, size, flags);
- //The block size of our peer.
- unsigned blockSize = dcachePort.peerBlockSize();
//The size of the data we're trying to read.
int fullSize = size;
//The address of the second part of this access if it needs to be split
//across a cache line boundary.
- Addr secondAddr = roundDown(addr + size - 1, blockSize);
+ Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
if (secondAddr > addr)
size = secondAddr - addr;
dcache_latency = 0;
+ req->taskId(taskId());
while (1) {
req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
// translate to physical address
- Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
+ Fault fault = thread->dtb->translateAtomic(req, thread->getTC(),
+ BaseTLB::Read);
// Now do the access.
if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
- Packet pkt = Packet(req,
- req->isLLSC() ? MemCmd::LoadLockedReq :
- MemCmd::ReadReq);
+ Packet pkt(req, Packet::makeReadCmd(req));
pkt.dataStatic(data);
if (req->isMmappedIpr())
//If we don't need to access a second cache line, stop now.
if (secondAddr <= addr)
{
- if (req->isLocked() && fault == NoFault) {
+ if (req->isLockedRMW() && fault == NoFault) {
assert(!locked);
locked = true;
}
+
return fault;
}
}
}
+Fault
+AtomicSimpleCPU::initiateMemRead(Addr addr, unsigned size, unsigned flags)
+{
+ panic("initiateMemRead() is for timing accesses, and should "
+ "never be called on AtomicSimpleCPU.\n");
+}
Fault
AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
Addr addr, unsigned flags, uint64_t *res)
{
+ SimpleExecContext& t_info = *threadInfo[curThread];
+ SimpleThread* thread = t_info.thread;
+ static uint8_t zero_array[64] = {};
+
+ if (data == NULL) {
+ assert(size <= 64);
+ assert(flags & Request::CACHE_BLOCK_ZERO);
+ // This must be a cache block cleaning request
+ data = zero_array;
+ }
+
// use the CPU's statically allocated write request and packet objects
Request *req = &data_write_req;
- if (traceData) {
- traceData->setAddr(addr);
- }
+ if (traceData)
+ traceData->setMem(addr, size, flags);
- //The block size of our peer.
- unsigned blockSize = dcachePort.peerBlockSize();
//The size of the data we're trying to read.
int fullSize = size;
//The address of the second part of this access if it needs to be split
//across a cache line boundary.
- Addr secondAddr = roundDown(addr + size - 1, blockSize);
+ Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
- if(secondAddr > addr)
+ if (secondAddr > addr)
size = secondAddr - addr;
dcache_latency = 0;
- while(1) {
+ req->taskId(taskId());
+ while (1) {
req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
// translate to physical address
- Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
+ Fault fault = thread->dtb->translateAtomic(req, thread->getTC(), BaseTLB::Write);
// Now do the access.
if (fault == NoFault) {
if (req->isLLSC()) {
cmd = MemCmd::StoreCondReq;
- do_access = TheISA::handleLockedWrite(thread, req);
+ do_access = TheISA::handleLockedWrite(thread, req, dcachePort.cacheBlockMask);
} else if (req->isSwap()) {
cmd = MemCmd::SwapReq;
if (req->isCondSwap()) {
system->getPhysMem().access(&pkt);
else
dcache_latency += dcachePort.sendAtomic(&pkt);
+
+ // Notify other threads on this CPU of write
+ threadSnoop(&pkt, curThread);
}
dcache_access = true;
assert(!pkt.isError());
if (req->isSwap()) {
assert(res);
- memcpy(res, pkt.getPtr<uint8_t>(), fullSize);
+ memcpy(res, pkt.getConstPtr<uint8_t>(), fullSize);
}
}
//stop now.
if (fault != NoFault || secondAddr <= addr)
{
- if (req->isLocked() && fault == NoFault) {
+ if (req->isLockedRMW() && fault == NoFault) {
assert(locked);
locked = false;
}
+
+
if (fault != NoFault && req->isPrefetch()) {
return NoFault;
} else {
{
DPRINTF(SimpleCPU, "Tick\n");
+ // Change thread if multi-threaded
+ swapActiveThread();
+
+ // Set memroy request ids to current thread
+ if (numThreads > 1) {
+ ContextID cid = threadContexts[curThread]->contextId();
+
+ ifetch_req.setThreadContext(cid, curThread);
+ data_read_req.setThreadContext(cid, curThread);
+ data_write_req.setThreadContext(cid, curThread);
+ }
+
+ SimpleExecContext& t_info = *threadInfo[curThread];
+ SimpleThread* thread = t_info.thread;
+
Tick latency = 0;
for (int i = 0; i < width || locked; ++i) {
numCycles++;
+ ppCycles->notify(1);
- if (!curStaticInst || !curStaticInst->isDelayedCommit())
+ if (!curStaticInst || !curStaticInst->isDelayedCommit()) {
checkForInterrupts();
+ checkPcEventQueue();
+ }
- checkPcEventQueue();
// We must have just got suspended by a PC event
- if (_status == Idle)
+ if (_status == Idle) {
+ tryCompleteDrain();
return;
+ }
Fault fault = NoFault;
bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
!curMacroStaticInst;
if (needToFetch) {
+ ifetch_req.taskId(taskId());
setupFetchRequest(&ifetch_req);
- fault = thread->itb->translateAtomic(&ifetch_req, tc,
+ fault = thread->itb->translateAtomic(&ifetch_req, thread->getTC(),
BaseTLB::Execute);
}
// like the I cache. It should be flushed, and when that works
// this code should be uncommented.
//Fetch more instruction memory if necessary
- //if(decoder.needMoreBytes())
+ //if (decoder.needMoreBytes())
//{
icache_access = true;
Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq);
preExecute();
if (curStaticInst) {
- fault = curStaticInst->execute(this, traceData);
+ fault = curStaticInst->execute(&t_info, traceData);
// keep an instruction count
- if (fault == NoFault)
+ if (fault == NoFault) {
countInst();
+ ppCommit->notify(std::make_pair(thread, curStaticInst));
+ }
else if (traceData && !DTRACE(ExecFaulting)) {
delete traceData;
traceData = NULL;
}
}
- if(fault != NoFault || !stayAtPC)
+ if (fault != NoFault || !t_info.stayAtPC)
advancePC(fault);
}
+ if (tryCompleteDrain())
+ return;
+
// instruction takes at least one cycle
if (latency < clockPeriod())
latency = clockPeriod();
if (_status != Idle)
- schedule(tickEvent, curTick() + latency);
+ reschedule(tickEvent, curTick() + latency, true);
}
+void
+AtomicSimpleCPU::regProbePoints()
+{
+ BaseCPU::regProbePoints();
+
+ ppCommit = new ProbePointArg<pair<SimpleThread*, const StaticInstPtr>>
+ (getProbeManager(), "Commit");
+}
void
AtomicSimpleCPU::printAddr(Addr a)
dcachePort.printAddr(a);
}
-
////////////////////////////////////////////////////////////////////////
//
// AtomicSimpleCPU Simulation Object
AtomicSimpleCPU *
AtomicSimpleCPUParams::create()
{
- numThreads = 1;
- if (!FullSystem && workload.size() != 1)
- panic("only one workload allowed");
return new AtomicSimpleCPU(this);
}