2 * Copyright 2014 Google, Inc.
3 * Copyright (c) 2012-2013,2015 ARM Limited
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
15 * Copyright (c) 2002-2005 The Regents of The University of Michigan
16 * All rights reserved.
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 * Authors: Steve Reinhardt
44 #include "cpu/simple/atomic.hh"
46 #include "arch/locked_mem.hh"
47 #include "arch/mmapped_ipr.hh"
48 #include "arch/utility.hh"
49 #include "base/bigint.hh"
50 #include "base/output.hh"
51 #include "config/the_isa.hh"
52 #include "cpu/exetrace.hh"
53 #include "debug/Drain.hh"
54 #include "debug/ExecFaulting.hh"
55 #include "debug/SimpleCPU.hh"
56 #include "mem/packet.hh"
57 #include "mem/packet_access.hh"
58 #include "mem/physical.hh"
59 #include "params/AtomicSimpleCPU.hh"
60 #include "sim/faults.hh"
61 #include "sim/full_system.hh"
62 #include "sim/system.hh"
65 using namespace TheISA
;
67 AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU
*c
)
68 : Event(CPU_Tick_Pri
), cpu(c
)
74 AtomicSimpleCPU::TickEvent::process()
80 AtomicSimpleCPU::TickEvent::description() const
82 return "AtomicSimpleCPU tick";
86 AtomicSimpleCPU::init()
88 BaseSimpleCPU::init();
90 int cid
= threadContexts
[0]->contextId();
91 ifetch_req
.setContext(cid
);
92 data_read_req
.setContext(cid
);
93 data_write_req
.setContext(cid
);
96 AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams
*p
)
97 : BaseSimpleCPU(p
), tickEvent(this), width(p
->width
), locked(false),
98 simulate_data_stalls(p
->simulate_data_stalls
),
99 simulate_inst_stalls(p
->simulate_inst_stalls
),
100 icachePort(name() + ".icache_port", this),
101 dcachePort(name() + ".dcache_port", this),
102 fastmem(p
->fastmem
), dcache_access(false), dcache_latency(0),
109 AtomicSimpleCPU::~AtomicSimpleCPU()
111 if (tickEvent
.scheduled()) {
112 deschedule(tickEvent
);
117 AtomicSimpleCPU::drain()
120 return DrainState::Drained
;
123 DPRINTF(Drain
, "Requesting drain.\n");
124 return DrainState::Draining
;
126 if (tickEvent
.scheduled())
127 deschedule(tickEvent
);
129 activeThreads
.clear();
130 DPRINTF(Drain
, "Not executing microcode, no need to drain.\n");
131 return DrainState::Drained
;
136 AtomicSimpleCPU::threadSnoop(PacketPtr pkt
, ThreadID sender
)
138 DPRINTF(SimpleCPU
, "received snoop pkt for addr:%#x %s\n", pkt
->getAddr(),
141 for (ThreadID tid
= 0; tid
< numThreads
; tid
++) {
143 if (getCpuAddrMonitor(tid
)->doMonitor(pkt
)) {
147 TheISA::handleLockedSnoop(threadInfo
[tid
]->thread
,
148 pkt
, dcachePort
.cacheBlockMask
);
154 AtomicSimpleCPU::drainResume()
156 assert(!tickEvent
.scheduled());
160 DPRINTF(SimpleCPU
, "Resume\n");
163 assert(!threadContexts
.empty());
165 _status
= BaseSimpleCPU::Idle
;
167 for (ThreadID tid
= 0; tid
< numThreads
; tid
++) {
168 if (threadInfo
[tid
]->thread
->status() == ThreadContext::Active
) {
169 threadInfo
[tid
]->notIdleFraction
= 1;
170 activeThreads
.push_back(tid
);
171 _status
= BaseSimpleCPU::Running
;
173 // Tick if any threads active
174 if (!tickEvent
.scheduled()) {
175 schedule(tickEvent
, nextCycle());
178 threadInfo
[tid
]->notIdleFraction
= 0;
184 AtomicSimpleCPU::tryCompleteDrain()
186 if (drainState() != DrainState::Draining
)
189 DPRINTF(Drain
, "tryCompleteDrain.\n");
193 DPRINTF(Drain
, "CPU done draining, processing drain event\n");
201 AtomicSimpleCPU::switchOut()
203 BaseSimpleCPU::switchOut();
205 assert(!tickEvent
.scheduled());
206 assert(_status
== BaseSimpleCPU::Running
|| _status
== Idle
);
212 AtomicSimpleCPU::takeOverFrom(BaseCPU
*oldCPU
)
214 BaseSimpleCPU::takeOverFrom(oldCPU
);
216 // The tick event should have been descheduled by drain()
217 assert(!tickEvent
.scheduled());
221 AtomicSimpleCPU::verifyMemoryMode() const
223 if (!system
->isAtomicMode()) {
224 fatal("The atomic CPU requires the memory system to be in "
230 AtomicSimpleCPU::activateContext(ThreadID thread_num
)
232 DPRINTF(SimpleCPU
, "ActivateContext %d\n", thread_num
);
234 assert(thread_num
< numThreads
);
236 threadInfo
[thread_num
]->notIdleFraction
= 1;
237 Cycles delta
= ticksToCycles(threadInfo
[thread_num
]->thread
->lastActivate
-
238 threadInfo
[thread_num
]->thread
->lastSuspend
);
240 ppCycles
->notify(delta
);
242 if (!tickEvent
.scheduled()) {
243 //Make sure ticks are still on multiples of cycles
244 schedule(tickEvent
, clockEdge(Cycles(0)));
246 _status
= BaseSimpleCPU::Running
;
247 if (std::find(activeThreads
.begin(), activeThreads
.end(), thread_num
)
248 == activeThreads
.end()) {
249 activeThreads
.push_back(thread_num
);
252 BaseCPU::activateContext(thread_num
);
257 AtomicSimpleCPU::suspendContext(ThreadID thread_num
)
259 DPRINTF(SimpleCPU
, "SuspendContext %d\n", thread_num
);
261 assert(thread_num
< numThreads
);
262 activeThreads
.remove(thread_num
);
267 assert(_status
== BaseSimpleCPU::Running
);
269 threadInfo
[thread_num
]->notIdleFraction
= 0;
271 if (activeThreads
.empty()) {
274 if (tickEvent
.scheduled()) {
275 deschedule(tickEvent
);
279 BaseCPU::suspendContext(thread_num
);
284 AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt
)
286 DPRINTF(SimpleCPU
, "received snoop pkt for addr:%#x %s\n", pkt
->getAddr(),
289 // X86 ISA: Snooping an invalidation for monitor/mwait
290 AtomicSimpleCPU
*cpu
= (AtomicSimpleCPU
*)(&owner
);
292 for (ThreadID tid
= 0; tid
< cpu
->numThreads
; tid
++) {
293 if (cpu
->getCpuAddrMonitor(tid
)->doMonitor(pkt
)) {
298 // if snoop invalidates, release any associated locks
299 // When run without caches, Invalidation packets will not be received
300 // hence we must check if the incoming packets are writes and wakeup
301 // the processor accordingly
302 if (pkt
->isInvalidate() || pkt
->isWrite()) {
303 DPRINTF(SimpleCPU
, "received invalidation for addr:%#x\n",
305 for (auto &t_info
: cpu
->threadInfo
) {
306 TheISA::handleLockedSnoop(t_info
->thread
, pkt
, cacheBlockMask
);
314 AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt
)
316 DPRINTF(SimpleCPU
, "received snoop pkt for addr:%#x %s\n", pkt
->getAddr(),
319 // X86 ISA: Snooping an invalidation for monitor/mwait
320 AtomicSimpleCPU
*cpu
= (AtomicSimpleCPU
*)(&owner
);
321 for (ThreadID tid
= 0; tid
< cpu
->numThreads
; tid
++) {
322 if (cpu
->getCpuAddrMonitor(tid
)->doMonitor(pkt
)) {
327 // if snoop invalidates, release any associated locks
328 if (pkt
->isInvalidate()) {
329 DPRINTF(SimpleCPU
, "received invalidation for addr:%#x\n",
331 for (auto &t_info
: cpu
->threadInfo
) {
332 TheISA::handleLockedSnoop(t_info
->thread
, pkt
, cacheBlockMask
);
338 AtomicSimpleCPU::readMem(Addr addr
, uint8_t * data
, unsigned size
,
339 Request::Flags flags
)
341 SimpleExecContext
& t_info
= *threadInfo
[curThread
];
342 SimpleThread
* thread
= t_info
.thread
;
344 // use the CPU's statically allocated read request and packet objects
345 Request
*req
= &data_read_req
;
348 traceData
->setMem(addr
, size
, flags
);
350 //The size of the data we're trying to read.
353 //The address of the second part of this access if it needs to be split
354 //across a cache line boundary.
355 Addr secondAddr
= roundDown(addr
+ size
- 1, cacheLineSize());
357 if (secondAddr
> addr
)
358 size
= secondAddr
- addr
;
362 req
->taskId(taskId());
364 req
->setVirt(0, addr
, size
, flags
, dataMasterId(), thread
->pcState().instAddr());
366 // translate to physical address
367 Fault fault
= thread
->dtb
->translateAtomic(req
, thread
->getTC(),
370 // Now do the access.
371 if (fault
== NoFault
&& !req
->getFlags().isSet(Request::NO_ACCESS
)) {
372 Packet
pkt(req
, Packet::makeReadCmd(req
));
373 pkt
.dataStatic(data
);
375 if (req
->isMmappedIpr())
376 dcache_latency
+= TheISA::handleIprRead(thread
->getTC(), &pkt
);
378 if (fastmem
&& system
->isMemAddr(pkt
.getAddr()))
379 system
->getPhysMem().access(&pkt
);
381 dcache_latency
+= dcachePort
.sendAtomic(&pkt
);
383 dcache_access
= true;
385 assert(!pkt
.isError());
388 TheISA::handleLockedRead(thread
, req
);
392 //If there's a fault, return it
393 if (fault
!= NoFault
) {
394 if (req
->isPrefetch()) {
401 //If we don't need to access a second cache line, stop now.
402 if (secondAddr
<= addr
)
404 if (req
->isLockedRMW() && fault
== NoFault
) {
413 * Set up for accessing the second cache line.
416 //Move the pointer we're reading into to the correct location.
418 //Adjust the size to get the remaining bytes.
419 size
= addr
+ fullSize
- secondAddr
;
420 //And access the right address.
426 AtomicSimpleCPU::initiateMemRead(Addr addr
, unsigned size
,
427 Request::Flags flags
)
429 panic("initiateMemRead() is for timing accesses, and should "
430 "never be called on AtomicSimpleCPU.\n");
434 AtomicSimpleCPU::writeMem(uint8_t *data
, unsigned size
, Addr addr
,
435 Request::Flags flags
, uint64_t *res
)
437 SimpleExecContext
& t_info
= *threadInfo
[curThread
];
438 SimpleThread
* thread
= t_info
.thread
;
439 static uint8_t zero_array
[64] = {};
443 assert(flags
& Request::CACHE_BLOCK_ZERO
);
444 // This must be a cache block cleaning request
448 // use the CPU's statically allocated write request and packet objects
449 Request
*req
= &data_write_req
;
452 traceData
->setMem(addr
, size
, flags
);
454 //The size of the data we're trying to read.
457 //The address of the second part of this access if it needs to be split
458 //across a cache line boundary.
459 Addr secondAddr
= roundDown(addr
+ size
- 1, cacheLineSize());
461 if (secondAddr
> addr
)
462 size
= secondAddr
- addr
;
466 req
->taskId(taskId());
468 req
->setVirt(0, addr
, size
, flags
, dataMasterId(), thread
->pcState().instAddr());
470 // translate to physical address
471 Fault fault
= thread
->dtb
->translateAtomic(req
, thread
->getTC(), BaseTLB::Write
);
473 // Now do the access.
474 if (fault
== NoFault
) {
475 MemCmd cmd
= MemCmd::WriteReq
; // default
476 bool do_access
= true; // flag to suppress cache access
479 cmd
= MemCmd::StoreCondReq
;
480 do_access
= TheISA::handleLockedWrite(thread
, req
, dcachePort
.cacheBlockMask
);
481 } else if (req
->isSwap()) {
482 cmd
= MemCmd::SwapReq
;
483 if (req
->isCondSwap()) {
485 req
->setExtraData(*res
);
489 if (do_access
&& !req
->getFlags().isSet(Request::NO_ACCESS
)) {
490 Packet pkt
= Packet(req
, cmd
);
491 pkt
.dataStatic(data
);
493 if (req
->isMmappedIpr()) {
495 TheISA::handleIprWrite(thread
->getTC(), &pkt
);
497 if (fastmem
&& system
->isMemAddr(pkt
.getAddr()))
498 system
->getPhysMem().access(&pkt
);
500 dcache_latency
+= dcachePort
.sendAtomic(&pkt
);
502 // Notify other threads on this CPU of write
503 threadSnoop(&pkt
, curThread
);
505 dcache_access
= true;
506 assert(!pkt
.isError());
510 memcpy(res
, pkt
.getConstPtr
<uint8_t>(), fullSize
);
514 if (res
&& !req
->isSwap()) {
515 *res
= req
->getExtraData();
519 //If there's a fault or we don't need to access a second cache line,
521 if (fault
!= NoFault
|| secondAddr
<= addr
)
523 if (req
->isLockedRMW() && fault
== NoFault
) {
529 if (fault
!= NoFault
&& req
->isPrefetch()) {
537 * Set up for accessing the second cache line.
540 //Move the pointer we're reading into to the correct location.
542 //Adjust the size to get the remaining bytes.
543 size
= addr
+ fullSize
- secondAddr
;
544 //And access the right address.
551 AtomicSimpleCPU::tick()
553 DPRINTF(SimpleCPU
, "Tick\n");
555 // Change thread if multi-threaded
558 // Set memroy request ids to current thread
559 if (numThreads
> 1) {
560 ContextID cid
= threadContexts
[curThread
]->contextId();
562 ifetch_req
.setContext(cid
);
563 data_read_req
.setContext(cid
);
564 data_write_req
.setContext(cid
);
567 SimpleExecContext
& t_info
= *threadInfo
[curThread
];
568 SimpleThread
* thread
= t_info
.thread
;
572 for (int i
= 0; i
< width
|| locked
; ++i
) {
576 if (!curStaticInst
|| !curStaticInst
->isDelayedCommit()) {
577 checkForInterrupts();
581 // We must have just got suspended by a PC event
582 if (_status
== Idle
) {
587 Fault fault
= NoFault
;
589 TheISA::PCState pcState
= thread
->pcState();
591 bool needToFetch
= !isRomMicroPC(pcState
.microPC()) &&
594 ifetch_req
.taskId(taskId());
595 setupFetchRequest(&ifetch_req
);
596 fault
= thread
->itb
->translateAtomic(&ifetch_req
, thread
->getTC(),
600 if (fault
== NoFault
) {
601 Tick icache_latency
= 0;
602 bool icache_access
= false;
603 dcache_access
= false; // assume no dcache access
606 // This is commented out because the decoder would act like
607 // a tiny cache otherwise. It wouldn't be flushed when needed
608 // like the I cache. It should be flushed, and when that works
609 // this code should be uncommented.
610 //Fetch more instruction memory if necessary
611 //if (decoder.needMoreBytes())
613 icache_access
= true;
614 Packet ifetch_pkt
= Packet(&ifetch_req
, MemCmd::ReadReq
);
615 ifetch_pkt
.dataStatic(&inst
);
617 if (fastmem
&& system
->isMemAddr(ifetch_pkt
.getAddr()))
618 system
->getPhysMem().access(&ifetch_pkt
);
620 icache_latency
= icachePort
.sendAtomic(&ifetch_pkt
);
622 assert(!ifetch_pkt
.isError());
624 // ifetch_req is initialized to read the instruction directly
625 // into the CPU object's inst field.
631 Tick stall_ticks
= 0;
633 fault
= curStaticInst
->execute(&t_info
, traceData
);
635 // keep an instruction count
636 if (fault
== NoFault
) {
638 ppCommit
->notify(std::make_pair(thread
, curStaticInst
));
640 else if (traceData
&& !DTRACE(ExecFaulting
)) {
645 if (dynamic_pointer_cast
<SyscallRetryFault
>(fault
)) {
646 // Retry execution of system calls after a delay.
647 // Prevents immediate re-execution since conditions which
648 // caused the retry are unlikely to change every tick.
649 stall_ticks
+= clockEdge(syscallRetryLatency
) - curTick();
655 // @todo remove me after debugging with legion done
656 if (curStaticInst
&& (!curStaticInst
->isMicroop() ||
657 curStaticInst
->isFirstMicroop()))
660 if (simulate_inst_stalls
&& icache_access
)
661 stall_ticks
+= icache_latency
;
663 if (simulate_data_stalls
&& dcache_access
)
664 stall_ticks
+= dcache_latency
;
667 // the atomic cpu does its accounting in ticks, so
668 // keep counting in ticks but round to the clock
670 latency
+= divCeil(stall_ticks
, clockPeriod()) *
675 if (fault
!= NoFault
|| !t_info
.stayAtPC
)
679 if (tryCompleteDrain())
682 // instruction takes at least one cycle
683 if (latency
< clockPeriod())
684 latency
= clockPeriod();
687 reschedule(tickEvent
, curTick() + latency
, true);
691 AtomicSimpleCPU::regProbePoints()
693 BaseCPU::regProbePoints();
695 ppCommit
= new ProbePointArg
<pair
<SimpleThread
*, const StaticInstPtr
>>
696 (getProbeManager(), "Commit");
700 AtomicSimpleCPU::printAddr(Addr a
)
702 dcachePort
.printAddr(a
);
705 ////////////////////////////////////////////////////////////////////////
707 // AtomicSimpleCPU Simulation Object
710 AtomicSimpleCPUParams::create()
712 return new AtomicSimpleCPU(this);