2 * Copyright 2014 Google, Inc.
3 * Copyright (c) 2012-2013,2015,2017-2020 ARM Limited
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
15 * Copyright (c) 2002-2005 The Regents of The University of Michigan
16 * All rights reserved.
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 #include "cpu/simple/atomic.hh"
44 #include "arch/locked_mem.hh"
45 #include "arch/utility.hh"
46 #include "base/output.hh"
47 #include "config/the_isa.hh"
48 #include "cpu/exetrace.hh"
49 #include "cpu/utils.hh"
50 #include "debug/Drain.hh"
51 #include "debug/ExecFaulting.hh"
52 #include "debug/SimpleCPU.hh"
53 #include "mem/packet.hh"
54 #include "mem/packet_access.hh"
55 #include "mem/physical.hh"
56 #include "params/AtomicSimpleCPU.hh"
57 #include "sim/faults.hh"
58 #include "sim/full_system.hh"
59 #include "sim/system.hh"
62 using namespace TheISA
;
65 AtomicSimpleCPU::init()
67 BaseSimpleCPU::init();
69 int cid
= threadContexts
[0]->contextId();
70 ifetch_req
->setContext(cid
);
71 data_read_req
->setContext(cid
);
72 data_write_req
->setContext(cid
);
73 data_amo_req
->setContext(cid
);
76 AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams
*p
)
78 tickEvent([this]{ tick(); }, "AtomicSimpleCPU tick",
79 false, Event::CPU_Tick_Pri
),
80 width(p
->width
), locked(false),
81 simulate_data_stalls(p
->simulate_data_stalls
),
82 simulate_inst_stalls(p
->simulate_inst_stalls
),
83 icachePort(name() + ".icache_port", this),
84 dcachePort(name() + ".dcache_port", this),
85 dcache_access(false), dcache_latency(0),
89 ifetch_req
= std::make_shared
<Request
>();
90 data_read_req
= std::make_shared
<Request
>();
91 data_write_req
= std::make_shared
<Request
>();
92 data_amo_req
= std::make_shared
<Request
>();
96 AtomicSimpleCPU::~AtomicSimpleCPU()
98 if (tickEvent
.scheduled()) {
99 deschedule(tickEvent
);
104 AtomicSimpleCPU::drain()
106 // Deschedule any power gating event (if any)
107 deschedulePowerGatingEvent();
110 return DrainState::Drained
;
112 if (!isCpuDrained()) {
113 DPRINTF(Drain
, "Requesting drain.\n");
114 return DrainState::Draining
;
116 if (tickEvent
.scheduled())
117 deschedule(tickEvent
);
119 activeThreads
.clear();
120 DPRINTF(Drain
, "Not executing microcode, no need to drain.\n");
121 return DrainState::Drained
;
126 AtomicSimpleCPU::threadSnoop(PacketPtr pkt
, ThreadID sender
)
128 DPRINTF(SimpleCPU
, "%s received snoop pkt for addr:%#x %s\n",
129 __func__
, pkt
->getAddr(), pkt
->cmdString());
131 for (ThreadID tid
= 0; tid
< numThreads
; tid
++) {
133 if (getCpuAddrMonitor(tid
)->doMonitor(pkt
)) {
137 TheISA::handleLockedSnoop(threadInfo
[tid
]->thread
,
138 pkt
, dcachePort
.cacheBlockMask
);
144 AtomicSimpleCPU::drainResume()
146 assert(!tickEvent
.scheduled());
150 DPRINTF(SimpleCPU
, "Resume\n");
153 assert(!threadContexts
.empty());
155 _status
= BaseSimpleCPU::Idle
;
157 for (ThreadID tid
= 0; tid
< numThreads
; tid
++) {
158 if (threadInfo
[tid
]->thread
->status() == ThreadContext::Active
) {
159 threadInfo
[tid
]->notIdleFraction
= 1;
160 activeThreads
.push_back(tid
);
161 _status
= BaseSimpleCPU::Running
;
163 // Tick if any threads active
164 if (!tickEvent
.scheduled()) {
165 schedule(tickEvent
, nextCycle());
168 threadInfo
[tid
]->notIdleFraction
= 0;
172 // Reschedule any power gating event (if any)
173 schedulePowerGatingEvent();
177 AtomicSimpleCPU::tryCompleteDrain()
179 if (drainState() != DrainState::Draining
)
182 DPRINTF(Drain
, "tryCompleteDrain.\n");
186 DPRINTF(Drain
, "CPU done draining, processing drain event\n");
194 AtomicSimpleCPU::switchOut()
196 BaseSimpleCPU::switchOut();
198 assert(!tickEvent
.scheduled());
199 assert(_status
== BaseSimpleCPU::Running
|| _status
== Idle
);
200 assert(isCpuDrained());
205 AtomicSimpleCPU::takeOverFrom(BaseCPU
*oldCPU
)
207 BaseSimpleCPU::takeOverFrom(oldCPU
);
209 // The tick event should have been descheduled by drain()
210 assert(!tickEvent
.scheduled());
214 AtomicSimpleCPU::verifyMemoryMode() const
216 if (!system
->isAtomicMode()) {
217 fatal("The atomic CPU requires the memory system to be in "
223 AtomicSimpleCPU::activateContext(ThreadID thread_num
)
225 DPRINTF(SimpleCPU
, "ActivateContext %d\n", thread_num
);
227 assert(thread_num
< numThreads
);
229 threadInfo
[thread_num
]->notIdleFraction
= 1;
230 Cycles delta
= ticksToCycles(threadInfo
[thread_num
]->thread
->lastActivate
-
231 threadInfo
[thread_num
]->thread
->lastSuspend
);
234 if (!tickEvent
.scheduled()) {
235 //Make sure ticks are still on multiples of cycles
236 schedule(tickEvent
, clockEdge(Cycles(0)));
238 _status
= BaseSimpleCPU::Running
;
239 if (std::find(activeThreads
.begin(), activeThreads
.end(), thread_num
)
240 == activeThreads
.end()) {
241 activeThreads
.push_back(thread_num
);
244 BaseCPU::activateContext(thread_num
);
249 AtomicSimpleCPU::suspendContext(ThreadID thread_num
)
251 DPRINTF(SimpleCPU
, "SuspendContext %d\n", thread_num
);
253 assert(thread_num
< numThreads
);
254 activeThreads
.remove(thread_num
);
259 assert(_status
== BaseSimpleCPU::Running
);
261 threadInfo
[thread_num
]->notIdleFraction
= 0;
263 if (activeThreads
.empty()) {
266 if (tickEvent
.scheduled()) {
267 deschedule(tickEvent
);
271 BaseCPU::suspendContext(thread_num
);
275 AtomicSimpleCPU::sendPacket(RequestPort
&port
, const PacketPtr
&pkt
)
277 return port
.sendAtomic(pkt
);
281 AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt
)
283 DPRINTF(SimpleCPU
, "%s received atomic snoop pkt for addr:%#x %s\n",
284 __func__
, pkt
->getAddr(), pkt
->cmdString());
286 // X86 ISA: Snooping an invalidation for monitor/mwait
287 AtomicSimpleCPU
*cpu
= (AtomicSimpleCPU
*)(&owner
);
289 for (ThreadID tid
= 0; tid
< cpu
->numThreads
; tid
++) {
290 if (cpu
->getCpuAddrMonitor(tid
)->doMonitor(pkt
)) {
295 // if snoop invalidates, release any associated locks
296 // When run without caches, Invalidation packets will not be received
297 // hence we must check if the incoming packets are writes and wakeup
298 // the processor accordingly
299 if (pkt
->isInvalidate() || pkt
->isWrite()) {
300 DPRINTF(SimpleCPU
, "received invalidation for addr:%#x\n",
302 for (auto &t_info
: cpu
->threadInfo
) {
303 TheISA::handleLockedSnoop(t_info
->thread
, pkt
, cacheBlockMask
);
311 AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt
)
313 DPRINTF(SimpleCPU
, "%s received functional snoop pkt for addr:%#x %s\n",
314 __func__
, pkt
->getAddr(), pkt
->cmdString());
316 // X86 ISA: Snooping an invalidation for monitor/mwait
317 AtomicSimpleCPU
*cpu
= (AtomicSimpleCPU
*)(&owner
);
318 for (ThreadID tid
= 0; tid
< cpu
->numThreads
; tid
++) {
319 if (cpu
->getCpuAddrMonitor(tid
)->doMonitor(pkt
)) {
324 // if snoop invalidates, release any associated locks
325 if (pkt
->isInvalidate()) {
326 DPRINTF(SimpleCPU
, "received invalidation for addr:%#x\n",
328 for (auto &t_info
: cpu
->threadInfo
) {
329 TheISA::handleLockedSnoop(t_info
->thread
, pkt
, cacheBlockMask
);
335 AtomicSimpleCPU::genMemFragmentRequest(const RequestPtr
& req
, Addr frag_addr
,
336 int size
, Request::Flags flags
,
337 const std::vector
<bool>& byte_enable
,
338 int& frag_size
, int& size_left
) const
340 bool predicate
= true;
341 Addr inst_addr
= threadInfo
[curThread
]->thread
->pcState().instAddr();
343 frag_size
= std::min(
344 cacheLineSize() - addrBlockOffset(frag_addr
, cacheLineSize()),
346 size_left
-= frag_size
;
348 if (!byte_enable
.empty()) {
349 // Set up byte-enable mask for the current fragment
350 auto it_start
= byte_enable
.begin() + (size
- (frag_size
+ size_left
));
351 auto it_end
= byte_enable
.begin() + (size
- size_left
);
352 if (isAnyActiveElement(it_start
, it_end
)) {
353 req
->setVirt(frag_addr
, frag_size
, flags
, dataRequestorId(),
355 req
->setByteEnable(std::vector
<bool>(it_start
, it_end
));
360 req
->setVirt(frag_addr
, frag_size
, flags
, dataRequestorId(),
362 req
->setByteEnable(std::vector
<bool>());
369 AtomicSimpleCPU::readMem(Addr addr
, uint8_t * data
, unsigned size
,
370 Request::Flags flags
,
371 const std::vector
<bool>& byte_enable
)
373 SimpleExecContext
& t_info
= *threadInfo
[curThread
];
374 SimpleThread
* thread
= t_info
.thread
;
376 // use the CPU's statically allocated read request and packet objects
377 const RequestPtr
&req
= data_read_req
;
380 traceData
->setMem(addr
, size
, flags
);
384 req
->taskId(taskId());
386 Addr frag_addr
= addr
;
388 int size_left
= size
;
390 Fault fault
= NoFault
;
393 predicate
= genMemFragmentRequest(req
, frag_addr
, size
, flags
,
394 byte_enable
, frag_size
, size_left
);
396 // translate to physical address
398 fault
= thread
->dtb
->translateAtomic(req
, thread
->getTC(),
402 // Now do the access.
403 if (predicate
&& fault
== NoFault
&&
404 !req
->getFlags().isSet(Request::NO_ACCESS
)) {
405 Packet
pkt(req
, Packet::makeReadCmd(req
));
406 pkt
.dataStatic(data
);
408 if (req
->isLocalAccess()) {
409 dcache_latency
+= req
->localAccessor(thread
->getTC(), &pkt
);
411 dcache_latency
+= sendPacket(dcachePort
, &pkt
);
413 dcache_access
= true;
415 assert(!pkt
.isError());
418 TheISA::handleLockedRead(thread
, req
);
422 //If there's a fault, return it
423 if (fault
!= NoFault
) {
424 if (req
->isPrefetch()) {
431 // If we don't need to access further cache lines, stop now.
432 if (size_left
== 0) {
433 if (req
->isLockedRMW() && fault
== NoFault
) {
441 * Set up for accessing the next cache line.
443 frag_addr
+= frag_size
;
445 //Move the pointer we're reading into to the correct location.
451 AtomicSimpleCPU::writeMem(uint8_t *data
, unsigned size
, Addr addr
,
452 Request::Flags flags
, uint64_t *res
,
453 const std::vector
<bool>& byte_enable
)
455 SimpleExecContext
& t_info
= *threadInfo
[curThread
];
456 SimpleThread
* thread
= t_info
.thread
;
457 static uint8_t zero_array
[64] = {};
461 assert(flags
& Request::STORE_NO_DATA
);
462 // This must be a cache block cleaning request
466 // use the CPU's statically allocated write request and packet objects
467 const RequestPtr
&req
= data_write_req
;
470 traceData
->setMem(addr
, size
, flags
);
474 req
->taskId(taskId());
476 Addr frag_addr
= addr
;
478 int size_left
= size
;
479 int curr_frag_id
= 0;
481 Fault fault
= NoFault
;
484 predicate
= genMemFragmentRequest(req
, frag_addr
, size
, flags
,
485 byte_enable
, frag_size
, size_left
);
487 // translate to physical address
489 fault
= thread
->dtb
->translateAtomic(req
, thread
->getTC(),
492 // Now do the access.
493 if (predicate
&& fault
== NoFault
) {
494 bool do_access
= true; // flag to suppress cache access
497 assert(curr_frag_id
== 0);
499 TheISA::handleLockedWrite(thread
, req
,
500 dcachePort
.cacheBlockMask
);
501 } else if (req
->isSwap()) {
502 assert(curr_frag_id
== 0);
503 if (req
->isCondSwap()) {
505 req
->setExtraData(*res
);
509 if (do_access
&& !req
->getFlags().isSet(Request::NO_ACCESS
)) {
510 Packet
pkt(req
, Packet::makeWriteCmd(req
));
511 pkt
.dataStatic(data
);
513 if (req
->isLocalAccess()) {
515 req
->localAccessor(thread
->getTC(), &pkt
);
517 dcache_latency
+= sendPacket(dcachePort
, &pkt
);
519 // Notify other threads on this CPU of write
520 threadSnoop(&pkt
, curThread
);
522 dcache_access
= true;
523 assert(!pkt
.isError());
526 assert(res
&& curr_frag_id
== 0);
527 memcpy(res
, pkt
.getConstPtr
<uint8_t>(), size
);
531 if (res
&& !req
->isSwap()) {
532 *res
= req
->getExtraData();
536 //If there's a fault or we don't need to access a second cache line,
538 if (fault
!= NoFault
|| size_left
== 0)
540 if (req
->isLockedRMW() && fault
== NoFault
) {
541 assert(!req
->isMasked());
545 if (fault
!= NoFault
&& req
->isPrefetch()) {
553 * Set up for accessing the next cache line.
555 frag_addr
+= frag_size
;
557 //Move the pointer we're reading into to the correct location.
565 AtomicSimpleCPU::amoMem(Addr addr
, uint8_t* data
, unsigned size
,
566 Request::Flags flags
, AtomicOpFunctorPtr amo_op
)
568 SimpleExecContext
& t_info
= *threadInfo
[curThread
];
569 SimpleThread
* thread
= t_info
.thread
;
571 // use the CPU's statically allocated amo request and packet objects
572 const RequestPtr
&req
= data_amo_req
;
575 traceData
->setMem(addr
, size
, flags
);
577 //The address of the second part of this access if it needs to be split
578 //across a cache line boundary.
579 Addr secondAddr
= roundDown(addr
+ size
- 1, cacheLineSize());
581 // AMO requests that access across a cache line boundary are not
582 // allowed since the cache does not guarantee AMO ops to be executed
583 // atomically in two cache lines
584 // For ISAs such as x86 that requires AMO operations to work on
585 // accesses that cross cache-line boundaries, the cache needs to be
586 // modified to support locking both cache lines to guarantee the
588 if (secondAddr
> addr
) {
589 panic("AMO request should not access across a cache line boundary\n");
594 req
->taskId(taskId());
595 req
->setVirt(addr
, size
, flags
, dataRequestorId(),
596 thread
->pcState().instAddr(), std::move(amo_op
));
598 // translate to physical address
599 Fault fault
= thread
->dtb
->translateAtomic(req
, thread
->getTC(),
602 // Now do the access.
603 if (fault
== NoFault
&& !req
->getFlags().isSet(Request::NO_ACCESS
)) {
604 // We treat AMO accesses as Write accesses with SwapReq command
605 // data will hold the return data of the AMO access
606 Packet
pkt(req
, Packet::makeWriteCmd(req
));
607 pkt
.dataStatic(data
);
609 if (req
->isLocalAccess())
610 dcache_latency
+= req
->localAccessor(thread
->getTC(), &pkt
);
612 dcache_latency
+= sendPacket(dcachePort
, &pkt
);
615 dcache_access
= true;
617 assert(!pkt
.isError());
618 assert(!req
->isLLSC());
621 if (fault
!= NoFault
&& req
->isPrefetch()) {
625 //If there's a fault and we're not doing prefetch, return it
630 AtomicSimpleCPU::tick()
632 DPRINTF(SimpleCPU
, "Tick\n");
634 // Change thread if multi-threaded
637 // Set memroy request ids to current thread
638 if (numThreads
> 1) {
639 ContextID cid
= threadContexts
[curThread
]->contextId();
641 ifetch_req
->setContext(cid
);
642 data_read_req
->setContext(cid
);
643 data_write_req
->setContext(cid
);
644 data_amo_req
->setContext(cid
);
647 SimpleExecContext
& t_info
= *threadInfo
[curThread
];
648 SimpleThread
* thread
= t_info
.thread
;
652 for (int i
= 0; i
< width
|| locked
; ++i
) {
654 updateCycleCounters(BaseCPU::CPU_STATE_ON
);
656 if (!curStaticInst
|| !curStaticInst
->isDelayedCommit()) {
657 checkForInterrupts();
661 // We must have just got suspended by a PC event
662 if (_status
== Idle
) {
667 Fault fault
= NoFault
;
669 TheISA::PCState pcState
= thread
->pcState();
671 bool needToFetch
= !isRomMicroPC(pcState
.microPC()) &&
674 ifetch_req
->taskId(taskId());
675 setupFetchRequest(ifetch_req
);
676 fault
= thread
->itb
->translateAtomic(ifetch_req
, thread
->getTC(),
680 if (fault
== NoFault
) {
681 Tick icache_latency
= 0;
682 bool icache_access
= false;
683 dcache_access
= false; // assume no dcache access
686 // This is commented out because the decoder would act like
687 // a tiny cache otherwise. It wouldn't be flushed when needed
688 // like the I cache. It should be flushed, and when that works
689 // this code should be uncommented.
690 //Fetch more instruction memory if necessary
691 //if (decoder.needMoreBytes())
693 icache_access
= true;
694 Packet ifetch_pkt
= Packet(ifetch_req
, MemCmd::ReadReq
);
695 ifetch_pkt
.dataStatic(&inst
);
697 icache_latency
= sendPacket(icachePort
, &ifetch_pkt
);
699 assert(!ifetch_pkt
.isError());
701 // ifetch_req is initialized to read the instruction directly
702 // into the CPU object's inst field.
708 Tick stall_ticks
= 0;
710 fault
= curStaticInst
->execute(&t_info
, traceData
);
712 // keep an instruction count
713 if (fault
== NoFault
) {
715 ppCommit
->notify(std::make_pair(thread
, curStaticInst
));
716 } else if (traceData
) {
720 if (fault
!= NoFault
&&
721 dynamic_pointer_cast
<SyscallRetryFault
>(fault
)) {
722 // Retry execution of system calls after a delay.
723 // Prevents immediate re-execution since conditions which
724 // caused the retry are unlikely to change every tick.
725 stall_ticks
+= clockEdge(syscallRetryLatency
) - curTick();
731 // @todo remove me after debugging with legion done
732 if (curStaticInst
&& (!curStaticInst
->isMicroop() ||
733 curStaticInst
->isFirstMicroop()))
736 if (simulate_inst_stalls
&& icache_access
)
737 stall_ticks
+= icache_latency
;
739 if (simulate_data_stalls
&& dcache_access
)
740 stall_ticks
+= dcache_latency
;
743 // the atomic cpu does its accounting in ticks, so
744 // keep counting in ticks but round to the clock
746 latency
+= divCeil(stall_ticks
, clockPeriod()) *
751 if (fault
!= NoFault
|| !t_info
.stayAtPC
)
755 if (tryCompleteDrain())
758 // instruction takes at least one cycle
759 if (latency
< clockPeriod())
760 latency
= clockPeriod();
763 reschedule(tickEvent
, curTick() + latency
, true);
767 AtomicSimpleCPU::regProbePoints()
769 BaseCPU::regProbePoints();
771 ppCommit
= new ProbePointArg
<pair
<SimpleThread
*, const StaticInstPtr
>>
772 (getProbeManager(), "Commit");
776 AtomicSimpleCPU::printAddr(Addr a
)
778 dcachePort
.printAddr(a
);
781 ////////////////////////////////////////////////////////////////////////
783 // AtomicSimpleCPU Simulation Object
786 AtomicSimpleCPUParams::create()
788 return new AtomicSimpleCPU(this);