#include "arch/mmaped_ipr.hh"
#include "arch/utility.hh"
#include "base/bigint.hh"
+#include "config/the_isa.hh"
#include "cpu/exetrace.hh"
#include "cpu/simple/atomic.hh"
#include "mem/packet.hh"
using namespace TheISA;
AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c)
- : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c)
+ : Event(CPU_Tick_Pri), cpu(c)
{
}
}
const char *
-AtomicSimpleCPU::TickEvent::description()
+AtomicSimpleCPU::TickEvent::description() const
{
return "AtomicSimpleCPU tick";
}
Port *
-AtomicSimpleCPU::getPort(const std::string &if_name, int idx)
+AtomicSimpleCPU::getPort(const string &if_name, int idx)
{
if (if_name == "dcache_port")
return &dcachePort;
{
BaseCPU::init();
#if FULL_SYSTEM
- for (int i = 0; i < threadContexts.size(); ++i) {
+ ThreadID size = threadContexts.size();
+ for (ThreadID i = 0; i < size; ++i) {
ThreadContext *tc = threadContexts[i];
// initialize CPU, including PC
- TheISA::initCPU(tc, tc->readCpuId());
+ TheISA::initCPU(tc, tc->contextId());
}
#endif
if (hasPhysMemPort) {
physmemPort.getPeerAddressRanges(pmAddrList, snoop);
physMemAddr = *pmAddrList.begin();
}
+ // Atomic doesn't do MT right now, so contextId == threadId
+ ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
+ data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
+ data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
}
bool
#if FULL_SYSTEM
// Update the ThreadContext's memory ports (Functional/Virtual
// Ports)
- cpu->tcBase()->connectMemPorts();
+ cpu->tcBase()->connectMemPorts(cpu->tcBase());
#endif
}
-AtomicSimpleCPU::AtomicSimpleCPU(Params *p)
- : BaseSimpleCPU(p), tickEvent(this),
- width(p->width), simulate_stalls(p->simulate_stalls),
+AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
+ : BaseSimpleCPU(p), tickEvent(this), width(p->width), locked(false),
+ simulate_data_stalls(p->simulate_data_stalls),
+ simulate_inst_stalls(p->simulate_inst_stalls),
icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this),
physmemPort(name() + "-iport", this), hasPhysMemPort(false)
{
icachePort.snoopRangeSent = false;
dcachePort.snoopRangeSent = false;
- ifetch_req.setThreadContext(p->cpu_id, 0); // Add thread ID if we add MT
- data_read_req.setThreadContext(p->cpu_id, 0); // Add thread ID here too
- data_write_req.setThreadContext(p->cpu_id, 0); // Add thread ID here too
}
AtomicSimpleCPU::~AtomicSimpleCPU()
{
+ if (tickEvent.scheduled()) {
+ deschedule(tickEvent);
+ }
}
void
{
SimObject::State so_state = SimObject::getState();
SERIALIZE_ENUM(so_state);
- Status _status = status();
- SERIALIZE_ENUM(_status);
+ SERIALIZE_SCALAR(locked);
BaseSimpleCPU::serialize(os);
nameOut(os, csprintf("%s.tickEvent", name()));
tickEvent.serialize(os);
{
SimObject::State so_state;
UNSERIALIZE_ENUM(so_state);
- UNSERIALIZE_ENUM(_status);
+ UNSERIALIZE_SCALAR(locked);
BaseSimpleCPU::unserialize(cp, section);
tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
}
void
AtomicSimpleCPU::resume()
{
+ if (_status == Idle || _status == SwitchedOut)
+ return;
+
DPRINTF(SimpleCPU, "Resume\n");
- if (_status != SwitchedOut && _status != Idle) {
- assert(system->getMemoryMode() == Enums::atomic);
+ assert(system->getMemoryMode() == Enums::atomic);
- changeState(SimObject::Running);
- if (thread->status() == ThreadContext::Active) {
- if (!tickEvent.scheduled()) {
- tickEvent.schedule(nextCycle());
- }
- }
+ changeState(SimObject::Running);
+ if (thread->status() == ThreadContext::Active) {
+ if (!tickEvent.scheduled())
+ schedule(tickEvent, nextCycle());
}
}
void
AtomicSimpleCPU::switchOut()
{
- assert(status() == Running || status() == Idle);
+ assert(_status == Running || _status == Idle);
_status = SwitchedOut;
tickEvent.squash();
// if any of this CPU's ThreadContexts are active, mark the CPU as
// running and schedule its tick event.
- for (int i = 0; i < threadContexts.size(); ++i) {
+ ThreadID size = threadContexts.size();
+ for (ThreadID i = 0; i < size; ++i) {
ThreadContext *tc = threadContexts[i];
if (tc->status() == ThreadContext::Active && _status != Running) {
_status = Running;
- tickEvent.schedule(nextCycle());
+ schedule(tickEvent, nextCycle());
break;
}
}
if (_status != Running) {
_status = Idle;
}
+ assert(threadContexts.size() == 1);
+ ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
+ data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
+ data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
}
assert(!tickEvent.scheduled());
notIdleFraction++;
+ numCycles += tickToCycles(thread->lastActivate - thread->lastSuspend);
//Make sure ticks are still on multiples of cycles
- tickEvent.schedule(nextCycle(curTick + cycles(delay)));
+ schedule(tickEvent, nextCycle(curTick + ticks(delay)));
_status = Running;
}
assert(thread_num == 0);
assert(thread);
+ if (_status == Idle)
+ return;
+
assert(_status == Running);
// tick event may not be scheduled if this gets called from inside
// an instruction's execution, e.g. "quiesce"
if (tickEvent.scheduled())
- tickEvent.deschedule();
+ deschedule(tickEvent);
notIdleFraction--;
_status = Idle;
{
// use the CPU's statically allocated read request and packet objects
Request *req = &data_read_req;
- req->setVirt(0, addr, sizeof(T), flags, thread->readPC());
if (traceData) {
traceData->setAddr(addr);
}
- // translate to physical address
- Fault fault = thread->translateDataReadReq(req);
-
- // Now do the access.
- if (fault == NoFault) {
- Packet pkt =
- Packet(req,
- req->isLocked() ? MemCmd::LoadLockedReq : MemCmd::ReadReq,
- Packet::Broadcast);
- pkt.dataStatic(&data);
-
- if (req->isMmapedIpr())
- dcache_latency = TheISA::handleIprRead(thread->getTC(), &pkt);
- else {
- if (hasPhysMemPort && pkt.getAddr() == physMemAddr)
- dcache_latency = physmemPort.sendAtomic(&pkt);
- else
- dcache_latency = dcachePort.sendAtomic(&pkt);
+ //The block size of our peer.
+ unsigned blockSize = dcachePort.peerBlockSize();
+ //The size of the data we're trying to read.
+ int dataSize = sizeof(T);
+
+ uint8_t * dataPtr = (uint8_t *)&data;
+
+ //The address of the second part of this access if it needs to be split
+ //across a cache line boundary.
+ Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
+
+ if(secondAddr > addr)
+ dataSize = secondAddr - addr;
+
+ dcache_latency = 0;
+
+ while(1) {
+ req->setVirt(0, addr, dataSize, flags, thread->readPC());
+
+ // translate to physical address
+ Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
+
+ // Now do the access.
+ if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
+ Packet pkt = Packet(req,
+ req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq,
+ Packet::Broadcast);
+ pkt.dataStatic(dataPtr);
+
+ if (req->isMmapedIpr())
+ dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt);
+ else {
+ if (hasPhysMemPort && pkt.getAddr() == physMemAddr)
+ dcache_latency += physmemPort.sendAtomic(&pkt);
+ else
+ dcache_latency += dcachePort.sendAtomic(&pkt);
+ }
+ dcache_access = true;
+
+ assert(!pkt.isError());
+
+ if (req->isLLSC()) {
+ TheISA::handleLockedRead(thread, req);
+ }
}
- dcache_access = true;
- assert(!pkt.isError());
- data = gtoh(data);
+ // This will need a new way to tell if it has a dcache attached.
+ if (req->isUncacheable())
+ recordEvent("Uncached Read");
- if (req->isLocked()) {
- TheISA::handleLockedRead(thread, req);
+ //If there's a fault, return it
+ if (fault != NoFault) {
+ if (req->isPrefetch()) {
+ return NoFault;
+ } else {
+ return fault;
+ }
}
- }
- // This will need a new way to tell if it has a dcache attached.
- if (req->isUncacheable())
- recordEvent("Uncached Read");
+ //If we don't need to access a second cache line, stop now.
+ if (secondAddr <= addr)
+ {
+ data = gtoh(data);
+ if (traceData) {
+ traceData->setData(data);
+ }
+ if (req->isLocked() && fault == NoFault) {
+ assert(!locked);
+ locked = true;
+ }
+ return fault;
+ }
- return fault;
+ /*
+ * Set up for accessing the second cache line.
+ */
+
+ //Move the pointer we're reading into to the correct location.
+ dataPtr += dataSize;
+ //Adjust the size to get the remaining bytes.
+ dataSize = addr + sizeof(T) - secondAddr;
+ //And access the right address.
+ addr = secondAddr;
+ }
}
#ifndef DOXYGEN_SHOULD_SKIP_THIS
{
// use the CPU's statically allocated write request and packet objects
Request *req = &data_write_req;
- req->setVirt(0, addr, sizeof(T), flags, thread->readPC());
if (traceData) {
traceData->setAddr(addr);
}
- // translate to physical address
- Fault fault = thread->translateDataWriteReq(req);
-
- // Now do the access.
- if (fault == NoFault) {
- MemCmd cmd = MemCmd::WriteReq; // default
- bool do_access = true; // flag to suppress cache access
-
- if (req->isLocked()) {
- cmd = MemCmd::StoreCondReq;
- do_access = TheISA::handleLockedWrite(thread, req);
- } else if (req->isSwap()) {
- cmd = MemCmd::SwapReq;
- if (req->isCondSwap()) {
- assert(res);
- req->setExtraData(*res);
- }
- }
+ //The block size of our peer.
+ unsigned blockSize = dcachePort.peerBlockSize();
+ //The size of the data we're trying to read.
+ int dataSize = sizeof(T);
- if (do_access) {
- Packet pkt = Packet(req, cmd, Packet::Broadcast);
- pkt.dataStatic(&data);
+ uint8_t * dataPtr = (uint8_t *)&data;
- if (req->isMmapedIpr()) {
- dcache_latency = TheISA::handleIprWrite(thread->getTC(), &pkt);
- } else {
- data = htog(data);
- if (hasPhysMemPort && pkt.getAddr() == physMemAddr)
- dcache_latency = physmemPort.sendAtomic(&pkt);
- else
- dcache_latency = dcachePort.sendAtomic(&pkt);
+ //The address of the second part of this access if it needs to be split
+ //across a cache line boundary.
+ Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
+
+ if(secondAddr > addr)
+ dataSize = secondAddr - addr;
+
+ dcache_latency = 0;
+
+ while(1) {
+ req->setVirt(0, addr, dataSize, flags, thread->readPC());
+
+ // translate to physical address
+ Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
+
+ // Now do the access.
+ if (fault == NoFault) {
+ MemCmd cmd = MemCmd::WriteReq; // default
+ bool do_access = true; // flag to suppress cache access
+
+ if (req->isLLSC()) {
+ cmd = MemCmd::StoreCondReq;
+ do_access = TheISA::handleLockedWrite(thread, req);
+ } else if (req->isSwap()) {
+ cmd = MemCmd::SwapReq;
+ if (req->isCondSwap()) {
+ assert(res);
+ req->setExtraData(*res);
+ }
+ }
+
+ if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) {
+ Packet pkt = Packet(req, cmd, Packet::Broadcast);
+ pkt.dataStatic(dataPtr);
+
+ if (req->isMmapedIpr()) {
+ dcache_latency +=
+ TheISA::handleIprWrite(thread->getTC(), &pkt);
+ } else {
+ //XXX This needs to be outside of the loop in order to
+ //work properly for cache line boundary crossing
+ //accesses in transendian simulations.
+ data = htog(data);
+ if (hasPhysMemPort && pkt.getAddr() == physMemAddr)
+ dcache_latency += physmemPort.sendAtomic(&pkt);
+ else
+ dcache_latency += dcachePort.sendAtomic(&pkt);
+ }
+ dcache_access = true;
+ assert(!pkt.isError());
+
+ if (req->isSwap()) {
+ assert(res);
+ *res = pkt.get<T>();
+ }
}
- dcache_access = true;
- assert(!pkt.isError());
- if (req->isSwap()) {
- assert(res);
- *res = pkt.get<T>();
+ if (res && !req->isSwap()) {
+ *res = req->getExtraData();
}
}
- if (res && !req->isSwap()) {
- *res = req->getExtraData();
+ // This will need a new way to tell if it's hooked up to a cache or not.
+ if (req->isUncacheable())
+ recordEvent("Uncached Write");
+
+ //If there's a fault or we don't need to access a second cache line,
+ //stop now.
+ if (fault != NoFault || secondAddr <= addr)
+ {
+ // If the write needs to have a fault on the access, consider
+ // calling changeStatus() and changing it to "bad addr write"
+ // or something.
+ if (traceData) {
+ traceData->setData(gtoh(data));
+ }
+ if (req->isLocked() && fault == NoFault) {
+ assert(locked);
+ locked = false;
+ }
+ if (fault != NoFault && req->isPrefetch()) {
+ return NoFault;
+ } else {
+ return fault;
+ }
}
- }
- // This will need a new way to tell if it's hooked up to a cache or not.
- if (req->isUncacheable())
- recordEvent("Uncached Write");
+ /*
+ * Set up for accessing the second cache line.
+ */
- // If the write needs to have a fault on the access, consider calling
- // changeStatus() and changing it to "bad addr write" or something.
- return fault;
+ //Move the pointer we're reading into to the correct location.
+ dataPtr += dataSize;
+ //Adjust the size to get the remaining bytes.
+ dataSize = addr + sizeof(T) - secondAddr;
+ //And access the right address.
+ addr = secondAddr;
+ }
}
{
DPRINTF(SimpleCPU, "Tick\n");
- Tick latency = cycles(1); // instruction takes one cycle by default
+ Tick latency = 0;
- for (int i = 0; i < width; ++i) {
+ for (int i = 0; i < width || locked; ++i) {
numCycles++;
if (!curStaticInst || !curStaticInst->isDelayedCommit())
checkForInterrupts();
- Fault fault = setupFetchRequest(&ifetch_req);
+ checkPcEventQueue();
+
+ Fault fault = NoFault;
+
+ bool fromRom = isRomMicroPC(thread->readMicroPC());
+ if (!fromRom && !curMacroStaticInst) {
+ setupFetchRequest(&ifetch_req);
+ fault = thread->itb->translateAtomic(&ifetch_req, tc,
+ BaseTLB::Execute);
+ }
if (fault == NoFault) {
Tick icache_latency = 0;
bool icache_access = false;
dcache_access = false; // assume no dcache access
- //Fetch more instruction memory if necessary
- //if(predecoder.needMoreBytes())
- //{
- icache_access = true;
- Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq,
- Packet::Broadcast);
- ifetch_pkt.dataStatic(&inst);
-
- if (hasPhysMemPort && ifetch_pkt.getAddr() == physMemAddr)
- icache_latency = physmemPort.sendAtomic(&ifetch_pkt);
- else
- icache_latency = icachePort.sendAtomic(&ifetch_pkt);
-
-
- // ifetch_req is initialized to read the instruction directly
- // into the CPU object's inst field.
- //}
+ if (!fromRom && !curMacroStaticInst) {
+ // This is commented out because the predecoder would act like
+ // a tiny cache otherwise. It wouldn't be flushed when needed
+ // like the I cache. It should be flushed, and when that works
+ // this code should be uncommented.
+ //Fetch more instruction memory if necessary
+ //if(predecoder.needMoreBytes())
+ //{
+ icache_access = true;
+ Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq,
+ Packet::Broadcast);
+ ifetch_pkt.dataStatic(&inst);
+
+ if (hasPhysMemPort && ifetch_pkt.getAddr() == physMemAddr)
+ icache_latency = physmemPort.sendAtomic(&ifetch_pkt);
+ else
+ icache_latency = icachePort.sendAtomic(&ifetch_pkt);
+
+ assert(!ifetch_pkt.isError());
+
+ // ifetch_req is initialized to read the instruction directly
+ // into the CPU object's inst field.
+ //}
+ }
preExecute();
- if(curStaticInst)
- {
+ if (curStaticInst) {
fault = curStaticInst->execute(this, traceData);
+
+ // keep an instruction count
+ if (fault == NoFault)
+ countInst();
+ else if (traceData) {
+ // If there was a fault, we should trace this instruction.
+ delete traceData;
+ traceData = NULL;
+ }
+
postExecute();
}
curStaticInst->isFirstMicroop()))
instCnt++;
- if (simulate_stalls) {
- Tick icache_stall =
- icache_access ? icache_latency - cycles(1) : 0;
- Tick dcache_stall =
- dcache_access ? dcache_latency - cycles(1) : 0;
- Tick stall_cycles = (icache_stall + dcache_stall) / cycles(1);
- if (cycles(stall_cycles) < (icache_stall + dcache_stall))
- latency += cycles(stall_cycles+1);
- else
- latency += cycles(stall_cycles);
+ Tick stall_ticks = 0;
+ if (simulate_inst_stalls && icache_access)
+ stall_ticks += icache_latency;
+
+ if (simulate_data_stalls && dcache_access)
+ stall_ticks += dcache_latency;
+
+ if (stall_ticks) {
+ Tick stall_cycles = stall_ticks / ticks(1);
+ Tick aligned_stall_ticks = ticks(stall_cycles);
+
+ if (aligned_stall_ticks < stall_ticks)
+ aligned_stall_ticks += 1;
+
+ latency += aligned_stall_ticks;
}
}
advancePC(fault);
}
+ // instruction takes at least one cycle
+ if (latency < ticks(1))
+ latency = ticks(1);
+
if (_status != Idle)
- tickEvent.schedule(curTick + latency);
+ schedule(tickEvent, curTick + latency);
+}
+
+
+void
+AtomicSimpleCPU::printAddr(Addr a)
+{
+ dcachePort.printAddr(a);
}
AtomicSimpleCPU *
AtomicSimpleCPUParams::create()
{
- AtomicSimpleCPU::Params *params = new AtomicSimpleCPU::Params();
- params->name = name;
- params->numberOfThreads = 1;
- params->max_insts_any_thread = max_insts_any_thread;
- params->max_insts_all_threads = max_insts_all_threads;
- params->max_loads_any_thread = max_loads_any_thread;
- params->max_loads_all_threads = max_loads_all_threads;
- params->progress_interval = progress_interval;
- params->deferRegistration = defer_registration;
- params->phase = phase;
- params->clock = clock;
- params->functionTrace = function_trace;
- params->functionTraceStart = function_trace_start;
- params->width = width;
- params->simulate_stalls = simulate_stalls;
- params->system = system;
- params->cpu_id = cpu_id;
- params->tracer = tracer;
-
-#if FULL_SYSTEM
- params->itb = itb;
- params->dtb = dtb;
- params->profile = profile;
- params->do_quiesce = do_quiesce;
- params->do_checkpoint_insts = do_checkpoint_insts;
- params->do_statistics_insts = do_statistics_insts;
-#else
+ numThreads = 1;
+#if !FULL_SYSTEM
if (workload.size() != 1)
panic("only one workload allowed");
- params->process = workload[0];
#endif
-
- AtomicSimpleCPU *cpu = new AtomicSimpleCPU(params);
- return cpu;
+ return new AtomicSimpleCPU(this);
}