2 * Copyright (c) 2002-2005 The Regents of The University of Michigan
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 * Authors: Steve Reinhardt
31 #include "arch/locked_mem.hh"
32 #include "arch/mmapped_ipr.hh"
33 #include "arch/utility.hh"
34 #include "base/bigint.hh"
35 #include "config/the_isa.hh"
36 #include "cpu/simple/atomic.hh"
37 #include "cpu/exetrace.hh"
38 #include "debug/ExecFaulting.hh"
39 #include "debug/SimpleCPU.hh"
40 #include "mem/packet.hh"
41 #include "mem/packet_access.hh"
42 #include "params/AtomicSimpleCPU.hh"
43 #include "sim/faults.hh"
44 #include "sim/system.hh"
45 #include "sim/full_system.hh"
48 using namespace TheISA
;
50 AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU
*c
)
51 : Event(CPU_Tick_Pri
), cpu(c
)
57 AtomicSimpleCPU::TickEvent::process()
63 AtomicSimpleCPU::TickEvent::description() const
65 return "AtomicSimpleCPU tick";
69 AtomicSimpleCPU::getPort(const string
&if_name
, int idx
)
71 if (if_name
== "dcache_port")
73 else if (if_name
== "icache_port")
75 else if (if_name
== "physmem_port") {
76 hasPhysMemPort
= true;
80 panic("No Such Port\n");
84 AtomicSimpleCPU::init()
88 ThreadID size
= threadContexts
.size();
89 for (ThreadID i
= 0; i
< size
; ++i
) {
90 ThreadContext
*tc
= threadContexts
[i
];
91 // initialize CPU, including PC
92 TheISA::initCPU(tc
, tc
->contextId());
96 // Initialise the ThreadContext's memory proxies
97 tcBase()->initMemProxies(tcBase());
100 AddrRangeList pmAddrList
= physmemPort
.getPeer()->getAddrRanges();
101 physMemAddr
= *pmAddrList
.begin();
103 // Atomic doesn't do MT right now, so contextId == threadId
104 ifetch_req
.setThreadContext(_cpuId
, 0); // Add thread ID if we add MT
105 data_read_req
.setThreadContext(_cpuId
, 0); // Add thread ID here too
106 data_write_req
.setThreadContext(_cpuId
, 0); // Add thread ID here too
109 AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams
*p
)
110 : BaseSimpleCPU(p
), tickEvent(this), width(p
->width
), locked(false),
111 simulate_data_stalls(p
->simulate_data_stalls
),
112 simulate_inst_stalls(p
->simulate_inst_stalls
),
113 icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this),
114 physmemPort(name() + "-iport", this), hasPhysMemPort(false)
120 AtomicSimpleCPU::~AtomicSimpleCPU()
122 if (tickEvent
.scheduled()) {
123 deschedule(tickEvent
);
128 AtomicSimpleCPU::serialize(ostream
&os
)
130 SimObject::State so_state
= SimObject::getState();
131 SERIALIZE_ENUM(so_state
);
132 SERIALIZE_SCALAR(locked
);
133 BaseSimpleCPU::serialize(os
);
134 nameOut(os
, csprintf("%s.tickEvent", name()));
135 tickEvent
.serialize(os
);
139 AtomicSimpleCPU::unserialize(Checkpoint
*cp
, const string
§ion
)
141 SimObject::State so_state
;
142 UNSERIALIZE_ENUM(so_state
);
143 UNSERIALIZE_SCALAR(locked
);
144 BaseSimpleCPU::unserialize(cp
, section
);
145 tickEvent
.unserialize(cp
, csprintf("%s.tickEvent", section
));
149 AtomicSimpleCPU::resume()
151 if (_status
== Idle
|| _status
== SwitchedOut
)
154 DPRINTF(SimpleCPU
, "Resume\n");
155 assert(system
->getMemoryMode() == Enums::atomic
);
157 changeState(SimObject::Running
);
158 if (thread
->status() == ThreadContext::Active
) {
159 if (!tickEvent
.scheduled())
160 schedule(tickEvent
, nextCycle());
162 system
->totalNumInsts
= 0;
166 AtomicSimpleCPU::switchOut()
168 assert(_status
== Running
|| _status
== Idle
);
169 _status
= SwitchedOut
;
176 AtomicSimpleCPU::takeOverFrom(BaseCPU
*oldCPU
)
178 BaseCPU::takeOverFrom(oldCPU
);
180 assert(!tickEvent
.scheduled());
182 // if any of this CPU's ThreadContexts are active, mark the CPU as
183 // running and schedule its tick event.
184 ThreadID size
= threadContexts
.size();
185 for (ThreadID i
= 0; i
< size
; ++i
) {
186 ThreadContext
*tc
= threadContexts
[i
];
187 if (tc
->status() == ThreadContext::Active
&& _status
!= Running
) {
189 schedule(tickEvent
, nextCycle());
193 if (_status
!= Running
) {
196 assert(threadContexts
.size() == 1);
197 ifetch_req
.setThreadContext(_cpuId
, 0); // Add thread ID if we add MT
198 data_read_req
.setThreadContext(_cpuId
, 0); // Add thread ID here too
199 data_write_req
.setThreadContext(_cpuId
, 0); // Add thread ID here too
204 AtomicSimpleCPU::activateContext(ThreadID thread_num
, int delay
)
206 DPRINTF(SimpleCPU
, "ActivateContext %d (%d cycles)\n", thread_num
, delay
);
208 assert(thread_num
== 0);
211 assert(_status
== Idle
);
212 assert(!tickEvent
.scheduled());
215 numCycles
+= tickToCycles(thread
->lastActivate
- thread
->lastSuspend
);
217 //Make sure ticks are still on multiples of cycles
218 schedule(tickEvent
, nextCycle(curTick() + ticks(delay
)));
224 AtomicSimpleCPU::suspendContext(ThreadID thread_num
)
226 DPRINTF(SimpleCPU
, "SuspendContext %d\n", thread_num
);
228 assert(thread_num
== 0);
234 assert(_status
== Running
);
236 // tick event may not be scheduled if this gets called from inside
237 // an instruction's execution, e.g. "quiesce"
238 if (tickEvent
.scheduled())
239 deschedule(tickEvent
);
247 AtomicSimpleCPU::readMem(Addr addr
, uint8_t * data
,
248 unsigned size
, unsigned flags
)
250 // use the CPU's statically allocated read request and packet objects
251 Request
*req
= &data_read_req
;
254 traceData
->setAddr(addr
);
257 //The block size of our peer.
258 unsigned blockSize
= dcachePort
.peerBlockSize();
259 //The size of the data we're trying to read.
262 //The address of the second part of this access if it needs to be split
263 //across a cache line boundary.
264 Addr secondAddr
= roundDown(addr
+ size
- 1, blockSize
);
266 if (secondAddr
> addr
)
267 size
= secondAddr
- addr
;
272 req
->setVirt(0, addr
, size
, flags
, dataMasterId(), thread
->pcState().instAddr());
274 // translate to physical address
275 Fault fault
= thread
->dtb
->translateAtomic(req
, tc
, BaseTLB::Read
);
277 // Now do the access.
278 if (fault
== NoFault
&& !req
->getFlags().isSet(Request::NO_ACCESS
)) {
279 Packet pkt
= Packet(req
,
280 req
->isLLSC() ? MemCmd::LoadLockedReq
: MemCmd::ReadReq
,
282 pkt
.dataStatic(data
);
284 if (req
->isMmappedIpr())
285 dcache_latency
+= TheISA::handleIprRead(thread
->getTC(), &pkt
);
287 if (hasPhysMemPort
&& pkt
.getAddr() == physMemAddr
)
288 dcache_latency
+= physmemPort
.sendAtomic(&pkt
);
290 dcache_latency
+= dcachePort
.sendAtomic(&pkt
);
292 dcache_access
= true;
294 assert(!pkt
.isError());
297 TheISA::handleLockedRead(thread
, req
);
301 //If there's a fault, return it
302 if (fault
!= NoFault
) {
303 if (req
->isPrefetch()) {
310 //If we don't need to access a second cache line, stop now.
311 if (secondAddr
<= addr
)
313 if (req
->isLocked() && fault
== NoFault
) {
321 * Set up for accessing the second cache line.
324 //Move the pointer we're reading into to the correct location.
326 //Adjust the size to get the remaining bytes.
327 size
= addr
+ fullSize
- secondAddr
;
328 //And access the right address.
335 AtomicSimpleCPU::writeMem(uint8_t *data
, unsigned size
,
336 Addr addr
, unsigned flags
, uint64_t *res
)
338 // use the CPU's statically allocated write request and packet objects
339 Request
*req
= &data_write_req
;
342 traceData
->setAddr(addr
);
345 //The block size of our peer.
346 unsigned blockSize
= dcachePort
.peerBlockSize();
347 //The size of the data we're trying to read.
350 //The address of the second part of this access if it needs to be split
351 //across a cache line boundary.
352 Addr secondAddr
= roundDown(addr
+ size
- 1, blockSize
);
354 if(secondAddr
> addr
)
355 size
= secondAddr
- addr
;
360 req
->setVirt(0, addr
, size
, flags
, dataMasterId(), thread
->pcState().instAddr());
362 // translate to physical address
363 Fault fault
= thread
->dtb
->translateAtomic(req
, tc
, BaseTLB::Write
);
365 // Now do the access.
366 if (fault
== NoFault
) {
367 MemCmd cmd
= MemCmd::WriteReq
; // default
368 bool do_access
= true; // flag to suppress cache access
371 cmd
= MemCmd::StoreCondReq
;
372 do_access
= TheISA::handleLockedWrite(thread
, req
);
373 } else if (req
->isSwap()) {
374 cmd
= MemCmd::SwapReq
;
375 if (req
->isCondSwap()) {
377 req
->setExtraData(*res
);
381 if (do_access
&& !req
->getFlags().isSet(Request::NO_ACCESS
)) {
382 Packet pkt
= Packet(req
, cmd
, Packet::Broadcast
);
383 pkt
.dataStatic(data
);
385 if (req
->isMmappedIpr()) {
387 TheISA::handleIprWrite(thread
->getTC(), &pkt
);
389 if (hasPhysMemPort
&& pkt
.getAddr() == physMemAddr
)
390 dcache_latency
+= physmemPort
.sendAtomic(&pkt
);
392 dcache_latency
+= dcachePort
.sendAtomic(&pkt
);
394 dcache_access
= true;
395 assert(!pkt
.isError());
399 memcpy(res
, pkt
.getPtr
<uint8_t>(), fullSize
);
403 if (res
&& !req
->isSwap()) {
404 *res
= req
->getExtraData();
408 //If there's a fault or we don't need to access a second cache line,
410 if (fault
!= NoFault
|| secondAddr
<= addr
)
412 if (req
->isLocked() && fault
== NoFault
) {
416 if (fault
!= NoFault
&& req
->isPrefetch()) {
424 * Set up for accessing the second cache line.
427 //Move the pointer we're reading into to the correct location.
429 //Adjust the size to get the remaining bytes.
430 size
= addr
+ fullSize
- secondAddr
;
431 //And access the right address.
438 AtomicSimpleCPU::tick()
440 DPRINTF(SimpleCPU
, "Tick\n");
444 for (int i
= 0; i
< width
|| locked
; ++i
) {
447 if (!curStaticInst
|| !curStaticInst
->isDelayedCommit())
448 checkForInterrupts();
451 // We must have just got suspended by a PC event
455 Fault fault
= NoFault
;
457 TheISA::PCState pcState
= thread
->pcState();
459 bool needToFetch
= !isRomMicroPC(pcState
.microPC()) &&
462 setupFetchRequest(&ifetch_req
);
463 fault
= thread
->itb
->translateAtomic(&ifetch_req
, tc
,
467 if (fault
== NoFault
) {
468 Tick icache_latency
= 0;
469 bool icache_access
= false;
470 dcache_access
= false; // assume no dcache access
473 // This is commented out because the predecoder would act like
474 // a tiny cache otherwise. It wouldn't be flushed when needed
475 // like the I cache. It should be flushed, and when that works
476 // this code should be uncommented.
477 //Fetch more instruction memory if necessary
478 //if(predecoder.needMoreBytes())
480 icache_access
= true;
481 Packet ifetch_pkt
= Packet(&ifetch_req
, MemCmd::ReadReq
,
483 ifetch_pkt
.dataStatic(&inst
);
485 if (hasPhysMemPort
&& ifetch_pkt
.getAddr() == physMemAddr
)
486 icache_latency
= physmemPort
.sendAtomic(&ifetch_pkt
);
488 icache_latency
= icachePort
.sendAtomic(&ifetch_pkt
);
490 assert(!ifetch_pkt
.isError());
492 // ifetch_req is initialized to read the instruction directly
493 // into the CPU object's inst field.
500 fault
= curStaticInst
->execute(this, traceData
);
502 // keep an instruction count
503 if (fault
== NoFault
)
505 else if (traceData
&& !DTRACE(ExecFaulting
)) {
513 // @todo remove me after debugging with legion done
514 if (curStaticInst
&& (!curStaticInst
->isMicroop() ||
515 curStaticInst
->isFirstMicroop()))
518 Tick stall_ticks
= 0;
519 if (simulate_inst_stalls
&& icache_access
)
520 stall_ticks
+= icache_latency
;
522 if (simulate_data_stalls
&& dcache_access
)
523 stall_ticks
+= dcache_latency
;
526 Tick stall_cycles
= stall_ticks
/ ticks(1);
527 Tick aligned_stall_ticks
= ticks(stall_cycles
);
529 if (aligned_stall_ticks
< stall_ticks
)
530 aligned_stall_ticks
+= 1;
532 latency
+= aligned_stall_ticks
;
536 if(fault
!= NoFault
|| !stayAtPC
)
540 // instruction takes at least one cycle
541 if (latency
< ticks(1))
545 schedule(tickEvent
, curTick() + latency
);
550 AtomicSimpleCPU::printAddr(Addr a
)
552 dcachePort
.printAddr(a
);
556 ////////////////////////////////////////////////////////////////////////
558 // AtomicSimpleCPU Simulation Object
561 AtomicSimpleCPUParams::create()
564 if (!FullSystem
&& workload
.size() != 1)
565 panic("only one workload allowed");
566 return new AtomicSimpleCPU(this);