2 * Copyright (c) 2002-2005 The Regents of The University of Michigan
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 * Authors: Steve Reinhardt
31 #include "arch/locked_mem.hh"
32 #include "arch/mmaped_ipr.hh"
33 #include "arch/utility.hh"
34 #include "base/bigint.hh"
35 #include "config/the_isa.hh"
36 #include "cpu/exetrace.hh"
37 #include "cpu/simple/atomic.hh"
38 #include "mem/packet.hh"
39 #include "mem/packet_access.hh"
40 #include "params/AtomicSimpleCPU.hh"
41 #include "sim/system.hh"
44 using namespace TheISA
;
46 AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU
*c
)
47 : Event(CPU_Tick_Pri
), cpu(c
)
53 AtomicSimpleCPU::TickEvent::process()
59 AtomicSimpleCPU::TickEvent::description() const
61 return "AtomicSimpleCPU tick";
65 AtomicSimpleCPU::getPort(const string
&if_name
, int idx
)
67 if (if_name
== "dcache_port")
69 else if (if_name
== "icache_port")
71 else if (if_name
== "physmem_port") {
72 hasPhysMemPort
= true;
76 panic("No Such Port\n");
80 AtomicSimpleCPU::init()
84 ThreadID size
= threadContexts
.size();
85 for (ThreadID i
= 0; i
< size
; ++i
) {
86 ThreadContext
*tc
= threadContexts
[i
];
88 // initialize CPU, including PC
89 TheISA::initCPU(tc
, tc
->contextId());
94 AddrRangeList pmAddrList
;
95 physmemPort
.getPeerAddressRanges(pmAddrList
, snoop
);
96 physMemAddr
= *pmAddrList
.begin();
98 // Atomic doesn't do MT right now, so contextId == threadId
99 ifetch_req
.setThreadContext(_cpuId
, 0); // Add thread ID if we add MT
100 data_read_req
.setThreadContext(_cpuId
, 0); // Add thread ID here too
101 data_write_req
.setThreadContext(_cpuId
, 0); // Add thread ID here too
105 AtomicSimpleCPU::CpuPort::recvTiming(PacketPtr pkt
)
107 panic("AtomicSimpleCPU doesn't expect recvTiming callback!");
112 AtomicSimpleCPU::CpuPort::recvAtomic(PacketPtr pkt
)
114 //Snooping a coherence request, just return
119 AtomicSimpleCPU::CpuPort::recvFunctional(PacketPtr pkt
)
121 //No internal storage to update, just return
126 AtomicSimpleCPU::CpuPort::recvStatusChange(Status status
)
128 if (status
== RangeChange
) {
129 if (!snoopRangeSent
) {
130 snoopRangeSent
= true;
131 sendStatusChange(Port::RangeChange
);
136 panic("AtomicSimpleCPU doesn't expect recvStatusChange callback!");
140 AtomicSimpleCPU::CpuPort::recvRetry()
142 panic("AtomicSimpleCPU doesn't expect recvRetry callback!");
146 AtomicSimpleCPU::DcachePort::setPeer(Port
*port
)
151 // Update the ThreadContext's memory ports (Functional/Virtual
153 cpu
->tcBase()->connectMemPorts(cpu
->tcBase());
157 AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams
*p
)
158 : BaseSimpleCPU(p
), tickEvent(this), width(p
->width
), locked(false),
159 simulate_data_stalls(p
->simulate_data_stalls
),
160 simulate_inst_stalls(p
->simulate_inst_stalls
),
161 icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this),
162 physmemPort(name() + "-iport", this), hasPhysMemPort(false)
166 icachePort
.snoopRangeSent
= false;
167 dcachePort
.snoopRangeSent
= false;
172 AtomicSimpleCPU::~AtomicSimpleCPU()
174 if (tickEvent
.scheduled()) {
175 deschedule(tickEvent
);
180 AtomicSimpleCPU::serialize(ostream
&os
)
182 SimObject::State so_state
= SimObject::getState();
183 SERIALIZE_ENUM(so_state
);
184 SERIALIZE_SCALAR(locked
);
185 BaseSimpleCPU::serialize(os
);
186 nameOut(os
, csprintf("%s.tickEvent", name()));
187 tickEvent
.serialize(os
);
191 AtomicSimpleCPU::unserialize(Checkpoint
*cp
, const string
§ion
)
193 SimObject::State so_state
;
194 UNSERIALIZE_ENUM(so_state
);
195 UNSERIALIZE_SCALAR(locked
);
196 BaseSimpleCPU::unserialize(cp
, section
);
197 tickEvent
.unserialize(cp
, csprintf("%s.tickEvent", section
));
201 AtomicSimpleCPU::resume()
203 if (_status
== Idle
|| _status
== SwitchedOut
)
206 DPRINTF(SimpleCPU
, "Resume\n");
207 assert(system
->getMemoryMode() == Enums::atomic
);
209 changeState(SimObject::Running
);
210 if (thread
->status() == ThreadContext::Active
) {
211 if (!tickEvent
.scheduled())
212 schedule(tickEvent
, nextCycle());
217 AtomicSimpleCPU::switchOut()
219 assert(_status
== Running
|| _status
== Idle
);
220 _status
= SwitchedOut
;
227 AtomicSimpleCPU::takeOverFrom(BaseCPU
*oldCPU
)
229 BaseCPU::takeOverFrom(oldCPU
, &icachePort
, &dcachePort
);
231 assert(!tickEvent
.scheduled());
233 // if any of this CPU's ThreadContexts are active, mark the CPU as
234 // running and schedule its tick event.
235 ThreadID size
= threadContexts
.size();
236 for (ThreadID i
= 0; i
< size
; ++i
) {
237 ThreadContext
*tc
= threadContexts
[i
];
238 if (tc
->status() == ThreadContext::Active
&& _status
!= Running
) {
240 schedule(tickEvent
, nextCycle());
244 if (_status
!= Running
) {
247 assert(threadContexts
.size() == 1);
248 ifetch_req
.setThreadContext(_cpuId
, 0); // Add thread ID if we add MT
249 data_read_req
.setThreadContext(_cpuId
, 0); // Add thread ID here too
250 data_write_req
.setThreadContext(_cpuId
, 0); // Add thread ID here too
255 AtomicSimpleCPU::activateContext(int thread_num
, int delay
)
257 DPRINTF(SimpleCPU
, "ActivateContext %d (%d cycles)\n", thread_num
, delay
);
259 assert(thread_num
== 0);
262 assert(_status
== Idle
);
263 assert(!tickEvent
.scheduled());
266 numCycles
+= tickToCycles(thread
->lastActivate
- thread
->lastSuspend
);
268 //Make sure ticks are still on multiples of cycles
269 schedule(tickEvent
, nextCycle(curTick
+ ticks(delay
)));
275 AtomicSimpleCPU::suspendContext(int thread_num
)
277 DPRINTF(SimpleCPU
, "SuspendContext %d\n", thread_num
);
279 assert(thread_num
== 0);
285 assert(_status
== Running
);
287 // tick event may not be scheduled if this gets called from inside
288 // an instruction's execution, e.g. "quiesce"
289 if (tickEvent
.scheduled())
290 deschedule(tickEvent
);
299 AtomicSimpleCPU::read(Addr addr
, T
&data
, unsigned flags
)
301 // use the CPU's statically allocated read request and packet objects
302 Request
*req
= &data_read_req
;
305 traceData
->setAddr(addr
);
308 //The block size of our peer.
309 unsigned blockSize
= dcachePort
.peerBlockSize();
310 //The size of the data we're trying to read.
311 int dataSize
= sizeof(T
);
313 uint8_t * dataPtr
= (uint8_t *)&data
;
315 //The address of the second part of this access if it needs to be split
316 //across a cache line boundary.
317 Addr secondAddr
= roundDown(addr
+ dataSize
- 1, blockSize
);
319 if(secondAddr
> addr
)
320 dataSize
= secondAddr
- addr
;
325 req
->setVirt(0, addr
, dataSize
, flags
, thread
->readPC());
327 // translate to physical address
328 Fault fault
= thread
->dtb
->translateAtomic(req
, tc
, BaseTLB::Read
);
330 // Now do the access.
331 if (fault
== NoFault
&& !req
->getFlags().isSet(Request::NO_ACCESS
)) {
332 Packet pkt
= Packet(req
,
333 req
->isLLSC() ? MemCmd::LoadLockedReq
: MemCmd::ReadReq
,
335 pkt
.dataStatic(dataPtr
);
337 if (req
->isMmapedIpr())
338 dcache_latency
+= TheISA::handleIprRead(thread
->getTC(), &pkt
);
340 if (hasPhysMemPort
&& pkt
.getAddr() == physMemAddr
)
341 dcache_latency
+= physmemPort
.sendAtomic(&pkt
);
343 dcache_latency
+= dcachePort
.sendAtomic(&pkt
);
345 dcache_access
= true;
347 assert(!pkt
.isError());
350 TheISA::handleLockedRead(thread
, req
);
354 // This will need a new way to tell if it has a dcache attached.
355 if (req
->isUncacheable())
356 recordEvent("Uncached Read");
358 //If there's a fault, return it
359 if (fault
!= NoFault
) {
360 if (req
->isPrefetch()) {
367 //If we don't need to access a second cache line, stop now.
368 if (secondAddr
<= addr
)
372 traceData
->setData(data
);
374 if (req
->isLocked() && fault
== NoFault
) {
382 * Set up for accessing the second cache line.
385 //Move the pointer we're reading into to the correct location.
387 //Adjust the size to get the remaining bytes.
388 dataSize
= addr
+ sizeof(T
) - secondAddr
;
389 //And access the right address.
394 #ifndef DOXYGEN_SHOULD_SKIP_THIS
398 AtomicSimpleCPU::read(Addr addr
, Twin32_t
&data
, unsigned flags
);
402 AtomicSimpleCPU::read(Addr addr
, Twin64_t
&data
, unsigned flags
);
406 AtomicSimpleCPU::read(Addr addr
, uint64_t &data
, unsigned flags
);
410 AtomicSimpleCPU::read(Addr addr
, uint32_t &data
, unsigned flags
);
414 AtomicSimpleCPU::read(Addr addr
, uint16_t &data
, unsigned flags
);
418 AtomicSimpleCPU::read(Addr addr
, uint8_t &data
, unsigned flags
);
420 #endif //DOXYGEN_SHOULD_SKIP_THIS
424 AtomicSimpleCPU::read(Addr addr
, double &data
, unsigned flags
)
426 return read(addr
, *(uint64_t*)&data
, flags
);
431 AtomicSimpleCPU::read(Addr addr
, float &data
, unsigned flags
)
433 return read(addr
, *(uint32_t*)&data
, flags
);
439 AtomicSimpleCPU::read(Addr addr
, int32_t &data
, unsigned flags
)
441 return read(addr
, (uint32_t&)data
, flags
);
447 AtomicSimpleCPU::write(T data
, Addr addr
, unsigned flags
, uint64_t *res
)
449 // use the CPU's statically allocated write request and packet objects
450 Request
*req
= &data_write_req
;
453 traceData
->setAddr(addr
);
456 //The block size of our peer.
457 unsigned blockSize
= dcachePort
.peerBlockSize();
458 //The size of the data we're trying to read.
459 int dataSize
= sizeof(T
);
461 uint8_t * dataPtr
= (uint8_t *)&data
;
463 //The address of the second part of this access if it needs to be split
464 //across a cache line boundary.
465 Addr secondAddr
= roundDown(addr
+ dataSize
- 1, blockSize
);
467 if(secondAddr
> addr
)
468 dataSize
= secondAddr
- addr
;
473 req
->setVirt(0, addr
, dataSize
, flags
, thread
->readPC());
475 // translate to physical address
476 Fault fault
= thread
->dtb
->translateAtomic(req
, tc
, BaseTLB::Write
);
478 // Now do the access.
479 if (fault
== NoFault
) {
480 MemCmd cmd
= MemCmd::WriteReq
; // default
481 bool do_access
= true; // flag to suppress cache access
484 cmd
= MemCmd::StoreCondReq
;
485 do_access
= TheISA::handleLockedWrite(thread
, req
);
486 } else if (req
->isSwap()) {
487 cmd
= MemCmd::SwapReq
;
488 if (req
->isCondSwap()) {
490 req
->setExtraData(*res
);
494 if (do_access
&& !req
->getFlags().isSet(Request::NO_ACCESS
)) {
495 Packet pkt
= Packet(req
, cmd
, Packet::Broadcast
);
496 pkt
.dataStatic(dataPtr
);
498 if (req
->isMmapedIpr()) {
500 TheISA::handleIprWrite(thread
->getTC(), &pkt
);
502 //XXX This needs to be outside of the loop in order to
503 //work properly for cache line boundary crossing
504 //accesses in transendian simulations.
506 if (hasPhysMemPort
&& pkt
.getAddr() == physMemAddr
)
507 dcache_latency
+= physmemPort
.sendAtomic(&pkt
);
509 dcache_latency
+= dcachePort
.sendAtomic(&pkt
);
511 dcache_access
= true;
512 assert(!pkt
.isError());
520 if (res
&& !req
->isSwap()) {
521 *res
= req
->getExtraData();
525 // This will need a new way to tell if it's hooked up to a cache or not.
526 if (req
->isUncacheable())
527 recordEvent("Uncached Write");
529 //If there's a fault or we don't need to access a second cache line,
531 if (fault
!= NoFault
|| secondAddr
<= addr
)
533 // If the write needs to have a fault on the access, consider
534 // calling changeStatus() and changing it to "bad addr write"
537 traceData
->setData(gtoh(data
));
539 if (req
->isLocked() && fault
== NoFault
) {
543 if (fault
!= NoFault
&& req
->isPrefetch()) {
551 * Set up for accessing the second cache line.
554 //Move the pointer we're reading into to the correct location.
556 //Adjust the size to get the remaining bytes.
557 dataSize
= addr
+ sizeof(T
) - secondAddr
;
558 //And access the right address.
564 #ifndef DOXYGEN_SHOULD_SKIP_THIS
568 AtomicSimpleCPU::write(Twin32_t data
, Addr addr
,
569 unsigned flags
, uint64_t *res
);
573 AtomicSimpleCPU::write(Twin64_t data
, Addr addr
,
574 unsigned flags
, uint64_t *res
);
578 AtomicSimpleCPU::write(uint64_t data
, Addr addr
,
579 unsigned flags
, uint64_t *res
);
583 AtomicSimpleCPU::write(uint32_t data
, Addr addr
,
584 unsigned flags
, uint64_t *res
);
588 AtomicSimpleCPU::write(uint16_t data
, Addr addr
,
589 unsigned flags
, uint64_t *res
);
593 AtomicSimpleCPU::write(uint8_t data
, Addr addr
,
594 unsigned flags
, uint64_t *res
);
596 #endif //DOXYGEN_SHOULD_SKIP_THIS
600 AtomicSimpleCPU::write(double data
, Addr addr
, unsigned flags
, uint64_t *res
)
602 return write(*(uint64_t*)&data
, addr
, flags
, res
);
607 AtomicSimpleCPU::write(float data
, Addr addr
, unsigned flags
, uint64_t *res
)
609 return write(*(uint32_t*)&data
, addr
, flags
, res
);
615 AtomicSimpleCPU::write(int32_t data
, Addr addr
, unsigned flags
, uint64_t *res
)
617 return write((uint32_t)data
, addr
, flags
, res
);
622 AtomicSimpleCPU::tick()
624 DPRINTF(SimpleCPU
, "Tick\n");
628 for (int i
= 0; i
< width
|| locked
; ++i
) {
631 if (!curStaticInst
|| !curStaticInst
->isDelayedCommit())
632 checkForInterrupts();
636 Fault fault
= NoFault
;
638 bool fromRom
= isRomMicroPC(thread
->readMicroPC());
639 if (!fromRom
&& !curMacroStaticInst
) {
640 setupFetchRequest(&ifetch_req
);
641 fault
= thread
->itb
->translateAtomic(&ifetch_req
, tc
,
645 if (fault
== NoFault
) {
646 Tick icache_latency
= 0;
647 bool icache_access
= false;
648 dcache_access
= false; // assume no dcache access
650 if (!fromRom
&& !curMacroStaticInst
) {
651 // This is commented out because the predecoder would act like
652 // a tiny cache otherwise. It wouldn't be flushed when needed
653 // like the I cache. It should be flushed, and when that works
654 // this code should be uncommented.
655 //Fetch more instruction memory if necessary
656 //if(predecoder.needMoreBytes())
658 icache_access
= true;
659 Packet ifetch_pkt
= Packet(&ifetch_req
, MemCmd::ReadReq
,
661 ifetch_pkt
.dataStatic(&inst
);
663 if (hasPhysMemPort
&& ifetch_pkt
.getAddr() == physMemAddr
)
664 icache_latency
= physmemPort
.sendAtomic(&ifetch_pkt
);
666 icache_latency
= icachePort
.sendAtomic(&ifetch_pkt
);
668 assert(!ifetch_pkt
.isError());
670 // ifetch_req is initialized to read the instruction directly
671 // into the CPU object's inst field.
678 fault
= curStaticInst
->execute(this, traceData
);
680 // keep an instruction count
681 if (fault
== NoFault
)
683 else if (traceData
) {
684 // If there was a fault, we should trace this instruction.
692 // @todo remove me after debugging with legion done
693 if (curStaticInst
&& (!curStaticInst
->isMicroop() ||
694 curStaticInst
->isFirstMicroop()))
697 Tick stall_ticks
= 0;
698 if (simulate_inst_stalls
&& icache_access
)
699 stall_ticks
+= icache_latency
;
701 if (simulate_data_stalls
&& dcache_access
)
702 stall_ticks
+= dcache_latency
;
705 Tick stall_cycles
= stall_ticks
/ ticks(1);
706 Tick aligned_stall_ticks
= ticks(stall_cycles
);
708 if (aligned_stall_ticks
< stall_ticks
)
709 aligned_stall_ticks
+= 1;
711 latency
+= aligned_stall_ticks
;
715 if(fault
!= NoFault
|| !stayAtPC
)
719 // instruction takes at least one cycle
720 if (latency
< ticks(1))
724 schedule(tickEvent
, curTick
+ latency
);
729 AtomicSimpleCPU::printAddr(Addr a
)
731 dcachePort
.printAddr(a
);
735 ////////////////////////////////////////////////////////////////////////
737 // AtomicSimpleCPU Simulation Object
740 AtomicSimpleCPUParams::create()
744 if (workload
.size() != 1)
745 panic("only one workload allowed");
747 return new AtomicSimpleCPU(this);