2 * Copyright (c) 2012 ARM Limited
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
14 * Copyright (c) 2002-2005 The Regents of The University of Michigan
15 * All rights reserved.
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 * Authors: Steve Reinhardt
43 #include "arch/locked_mem.hh"
44 #include "arch/mmapped_ipr.hh"
45 #include "arch/utility.hh"
46 #include "base/bigint.hh"
47 #include "base/output.hh"
48 #include "config/the_isa.hh"
49 #include "cpu/simple/atomic.hh"
50 #include "cpu/exetrace.hh"
51 #include "debug/Drain.hh"
52 #include "debug/ExecFaulting.hh"
53 #include "debug/SimpleCPU.hh"
54 #include "mem/packet.hh"
55 #include "mem/packet_access.hh"
56 #include "mem/physical.hh"
57 #include "params/AtomicSimpleCPU.hh"
58 #include "sim/faults.hh"
59 #include "sim/system.hh"
60 #include "sim/full_system.hh"
63 using namespace TheISA
;
65 AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU
*c
)
66 : Event(CPU_Tick_Pri
), cpu(c
)
72 AtomicSimpleCPU::TickEvent::process()
78 AtomicSimpleCPU::TickEvent::description() const
80 return "AtomicSimpleCPU tick";
84 AtomicSimpleCPU::init()
88 // Initialise the ThreadContext's memory proxies
89 tcBase()->initMemProxies(tcBase());
91 if (FullSystem
&& !params()->switched_out
) {
92 ThreadID size
= threadContexts
.size();
93 for (ThreadID i
= 0; i
< size
; ++i
) {
94 ThreadContext
*tc
= threadContexts
[i
];
95 // initialize CPU, including PC
96 TheISA::initCPU(tc
, tc
->contextId());
100 // Atomic doesn't do MT right now, so contextId == threadId
101 ifetch_req
.setThreadContext(_cpuId
, 0); // Add thread ID if we add MT
102 data_read_req
.setThreadContext(_cpuId
, 0); // Add thread ID here too
103 data_write_req
.setThreadContext(_cpuId
, 0); // Add thread ID here too
106 AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams
*p
)
107 : BaseSimpleCPU(p
), tickEvent(this), width(p
->width
), locked(false),
108 simulate_data_stalls(p
->simulate_data_stalls
),
109 simulate_inst_stalls(p
->simulate_inst_stalls
),
111 icachePort(name() + ".icache_port", this),
112 dcachePort(name() + ".dcache_port", this),
114 simpoint(p
->simpoint_profile
),
115 intervalSize(p
->simpoint_interval
),
118 simpointStream(NULL
),
120 currentBBVInstCount(0)
125 simpointStream
= simout
.create(p
->simpoint_profile_file
, false);
130 AtomicSimpleCPU::~AtomicSimpleCPU()
132 if (tickEvent
.scheduled()) {
133 deschedule(tickEvent
);
135 if (simpointStream
) {
136 simout
.close(simpointStream
);
141 AtomicSimpleCPU::drain(DrainManager
*dm
)
143 assert(!drain_manager
);
148 DPRINTF(Drain
, "Requesting drain: %s\n", pcState());
152 if (tickEvent
.scheduled())
153 deschedule(tickEvent
);
155 DPRINTF(Drain
, "Not executing microcode, no need to drain.\n");
161 AtomicSimpleCPU::drainResume()
163 assert(!tickEvent
.scheduled());
164 assert(!drain_manager
);
168 DPRINTF(SimpleCPU
, "Resume\n");
171 assert(!threadContexts
.empty());
172 if (threadContexts
.size() > 1)
173 fatal("The atomic CPU only supports one thread.\n");
175 if (thread
->status() == ThreadContext::Active
) {
176 schedule(tickEvent
, nextCycle());
177 _status
= BaseSimpleCPU::Running
;
180 _status
= BaseSimpleCPU::Idle
;
184 system
->totalNumInsts
= 0;
188 AtomicSimpleCPU::tryCompleteDrain()
193 DPRINTF(Drain
, "tryCompleteDrain: %s\n", pcState());
197 DPRINTF(Drain
, "CPU done draining, processing drain event\n");
198 drain_manager
->signalDrainDone();
199 drain_manager
= NULL
;
206 AtomicSimpleCPU::switchOut()
208 BaseSimpleCPU::switchOut();
210 assert(!tickEvent
.scheduled());
211 assert(_status
== BaseSimpleCPU::Running
|| _status
== Idle
);
217 AtomicSimpleCPU::takeOverFrom(BaseCPU
*oldCPU
)
219 BaseSimpleCPU::takeOverFrom(oldCPU
);
221 // The tick event should have been descheduled by drain()
222 assert(!tickEvent
.scheduled());
224 ifetch_req
.setThreadContext(_cpuId
, 0); // Add thread ID if we add MT
225 data_read_req
.setThreadContext(_cpuId
, 0); // Add thread ID here too
226 data_write_req
.setThreadContext(_cpuId
, 0); // Add thread ID here too
230 AtomicSimpleCPU::verifyMemoryMode() const
232 if (!system
->isAtomicMode()) {
233 fatal("The atomic CPU requires the memory system to be in "
239 AtomicSimpleCPU::activateContext(ThreadID thread_num
, Cycles delay
)
241 DPRINTF(SimpleCPU
, "ActivateContext %d (%d cycles)\n", thread_num
, delay
);
243 assert(thread_num
== 0);
246 assert(_status
== Idle
);
247 assert(!tickEvent
.scheduled());
250 numCycles
+= ticksToCycles(thread
->lastActivate
- thread
->lastSuspend
);
252 //Make sure ticks are still on multiples of cycles
253 schedule(tickEvent
, clockEdge(delay
));
254 _status
= BaseSimpleCPU::Running
;
259 AtomicSimpleCPU::suspendContext(ThreadID thread_num
)
261 DPRINTF(SimpleCPU
, "SuspendContext %d\n", thread_num
);
263 assert(thread_num
== 0);
269 assert(_status
== BaseSimpleCPU::Running
);
271 // tick event may not be scheduled if this gets called from inside
272 // an instruction's execution, e.g. "quiesce"
273 if (tickEvent
.scheduled())
274 deschedule(tickEvent
);
282 AtomicSimpleCPU::readMem(Addr addr
, uint8_t * data
,
283 unsigned size
, unsigned flags
)
285 // use the CPU's statically allocated read request and packet objects
286 Request
*req
= &data_read_req
;
289 traceData
->setAddr(addr
);
292 //The size of the data we're trying to read.
295 //The address of the second part of this access if it needs to be split
296 //across a cache line boundary.
297 Addr secondAddr
= roundDown(addr
+ size
- 1, cacheLineSize());
299 if (secondAddr
> addr
)
300 size
= secondAddr
- addr
;
305 req
->setVirt(0, addr
, size
, flags
, dataMasterId(), thread
->pcState().instAddr());
307 // translate to physical address
308 Fault fault
= thread
->dtb
->translateAtomic(req
, tc
, BaseTLB::Read
);
310 // Now do the access.
311 if (fault
== NoFault
&& !req
->getFlags().isSet(Request::NO_ACCESS
)) {
312 Packet pkt
= Packet(req
,
313 req
->isLLSC() ? MemCmd::LoadLockedReq
:
315 pkt
.dataStatic(data
);
317 if (req
->isMmappedIpr())
318 dcache_latency
+= TheISA::handleIprRead(thread
->getTC(), &pkt
);
320 if (fastmem
&& system
->isMemAddr(pkt
.getAddr()))
321 system
->getPhysMem().access(&pkt
);
323 dcache_latency
+= dcachePort
.sendAtomic(&pkt
);
325 dcache_access
= true;
327 assert(!pkt
.isError());
330 TheISA::handleLockedRead(thread
, req
);
334 //If there's a fault, return it
335 if (fault
!= NoFault
) {
336 if (req
->isPrefetch()) {
343 //If we don't need to access a second cache line, stop now.
344 if (secondAddr
<= addr
)
346 if (req
->isLocked() && fault
== NoFault
) {
354 * Set up for accessing the second cache line.
357 //Move the pointer we're reading into to the correct location.
359 //Adjust the size to get the remaining bytes.
360 size
= addr
+ fullSize
- secondAddr
;
361 //And access the right address.
368 AtomicSimpleCPU::writeMem(uint8_t *data
, unsigned size
,
369 Addr addr
, unsigned flags
, uint64_t *res
)
371 // use the CPU's statically allocated write request and packet objects
372 Request
*req
= &data_write_req
;
375 traceData
->setAddr(addr
);
378 //The size of the data we're trying to read.
381 //The address of the second part of this access if it needs to be split
382 //across a cache line boundary.
383 Addr secondAddr
= roundDown(addr
+ size
- 1, cacheLineSize());
385 if(secondAddr
> addr
)
386 size
= secondAddr
- addr
;
391 req
->setVirt(0, addr
, size
, flags
, dataMasterId(), thread
->pcState().instAddr());
393 // translate to physical address
394 Fault fault
= thread
->dtb
->translateAtomic(req
, tc
, BaseTLB::Write
);
396 // Now do the access.
397 if (fault
== NoFault
) {
398 MemCmd cmd
= MemCmd::WriteReq
; // default
399 bool do_access
= true; // flag to suppress cache access
402 cmd
= MemCmd::StoreCondReq
;
403 do_access
= TheISA::handleLockedWrite(thread
, req
);
404 } else if (req
->isSwap()) {
405 cmd
= MemCmd::SwapReq
;
406 if (req
->isCondSwap()) {
408 req
->setExtraData(*res
);
412 if (do_access
&& !req
->getFlags().isSet(Request::NO_ACCESS
)) {
413 Packet pkt
= Packet(req
, cmd
);
414 pkt
.dataStatic(data
);
416 if (req
->isMmappedIpr()) {
418 TheISA::handleIprWrite(thread
->getTC(), &pkt
);
420 if (fastmem
&& system
->isMemAddr(pkt
.getAddr()))
421 system
->getPhysMem().access(&pkt
);
423 dcache_latency
+= dcachePort
.sendAtomic(&pkt
);
425 dcache_access
= true;
426 assert(!pkt
.isError());
430 memcpy(res
, pkt
.getPtr
<uint8_t>(), fullSize
);
434 if (res
&& !req
->isSwap()) {
435 *res
= req
->getExtraData();
439 //If there's a fault or we don't need to access a second cache line,
441 if (fault
!= NoFault
|| secondAddr
<= addr
)
443 if (req
->isLocked() && fault
== NoFault
) {
447 if (fault
!= NoFault
&& req
->isPrefetch()) {
455 * Set up for accessing the second cache line.
458 //Move the pointer we're reading into to the correct location.
460 //Adjust the size to get the remaining bytes.
461 size
= addr
+ fullSize
- secondAddr
;
462 //And access the right address.
469 AtomicSimpleCPU::tick()
471 DPRINTF(SimpleCPU
, "Tick\n");
475 for (int i
= 0; i
< width
|| locked
; ++i
) {
478 if (!curStaticInst
|| !curStaticInst
->isDelayedCommit())
479 checkForInterrupts();
482 // We must have just got suspended by a PC event
483 if (_status
== Idle
) {
488 Fault fault
= NoFault
;
490 TheISA::PCState pcState
= thread
->pcState();
492 bool needToFetch
= !isRomMicroPC(pcState
.microPC()) &&
495 setupFetchRequest(&ifetch_req
);
496 fault
= thread
->itb
->translateAtomic(&ifetch_req
, tc
,
500 if (fault
== NoFault
) {
501 Tick icache_latency
= 0;
502 bool icache_access
= false;
503 dcache_access
= false; // assume no dcache access
506 // This is commented out because the decoder would act like
507 // a tiny cache otherwise. It wouldn't be flushed when needed
508 // like the I cache. It should be flushed, and when that works
509 // this code should be uncommented.
510 //Fetch more instruction memory if necessary
511 //if(decoder.needMoreBytes())
513 icache_access
= true;
514 Packet ifetch_pkt
= Packet(&ifetch_req
, MemCmd::ReadReq
);
515 ifetch_pkt
.dataStatic(&inst
);
517 if (fastmem
&& system
->isMemAddr(ifetch_pkt
.getAddr()))
518 system
->getPhysMem().access(&ifetch_pkt
);
520 icache_latency
= icachePort
.sendAtomic(&ifetch_pkt
);
522 assert(!ifetch_pkt
.isError());
524 // ifetch_req is initialized to read the instruction directly
525 // into the CPU object's inst field.
532 fault
= curStaticInst
->execute(this, traceData
);
534 // keep an instruction count
535 if (fault
== NoFault
)
537 else if (traceData
&& !DTRACE(ExecFaulting
)) {
545 // @todo remove me after debugging with legion done
546 if (curStaticInst
&& (!curStaticInst
->isMicroop() ||
547 curStaticInst
->isFirstMicroop()))
550 // profile for SimPoints if enabled and macro inst is finished
551 if (simpoint
&& curStaticInst
&& (fault
== NoFault
) &&
552 (!curStaticInst
->isMicroop() ||
553 curStaticInst
->isLastMicroop())) {
557 Tick stall_ticks
= 0;
558 if (simulate_inst_stalls
&& icache_access
)
559 stall_ticks
+= icache_latency
;
561 if (simulate_data_stalls
&& dcache_access
)
562 stall_ticks
+= dcache_latency
;
565 // the atomic cpu does its accounting in ticks, so
566 // keep counting in ticks but round to the clock
568 latency
+= divCeil(stall_ticks
, clockPeriod()) *
573 if(fault
!= NoFault
|| !stayAtPC
)
577 if (tryCompleteDrain())
580 // instruction takes at least one cycle
581 if (latency
< clockPeriod())
582 latency
= clockPeriod();
585 schedule(tickEvent
, curTick() + latency
);
590 AtomicSimpleCPU::printAddr(Addr a
)
592 dcachePort
.printAddr(a
);
596 AtomicSimpleCPU::profileSimPoint()
598 if (!currentBBVInstCount
)
599 currentBBV
.first
= thread
->pcState().instAddr();
602 ++currentBBVInstCount
;
604 // If inst is control inst, assume end of basic block.
605 if (curStaticInst
->isControl()) {
606 currentBBV
.second
= thread
->pcState().instAddr();
608 auto map_itr
= bbMap
.find(currentBBV
);
609 if (map_itr
== bbMap
.end()){
610 // If a new (previously unseen) basic block is found,
611 // add a new unique id, record num of insts and insert into bbMap.
613 info
.id
= bbMap
.size() + 1;
614 info
.insts
= currentBBVInstCount
;
615 info
.count
= currentBBVInstCount
;
616 bbMap
.insert(std::make_pair(currentBBV
, info
));
618 // If basic block is seen before, just increment the count by the
619 // number of insts in basic block.
620 BBInfo
& info
= map_itr
->second
;
621 assert(info
.insts
== currentBBVInstCount
);
622 info
.count
+= currentBBVInstCount
;
624 currentBBVInstCount
= 0;
626 // Reached end of interval if the sum of the current inst count
627 // (intervalCount) and the excessive inst count from the previous
628 // interval (intervalDrift) is greater than/equal to the interval size.
629 if (intervalCount
+ intervalDrift
>= intervalSize
) {
630 // summarize interval and display BBV info
631 std::vector
<pair
<uint64_t, uint64_t> > counts
;
632 for (auto map_itr
= bbMap
.begin(); map_itr
!= bbMap
.end();
634 BBInfo
& info
= map_itr
->second
;
635 if (info
.count
!= 0) {
636 counts
.push_back(std::make_pair(info
.id
, info
.count
));
640 std::sort(counts
.begin(), counts
.end());
642 // Print output BBV info
643 *simpointStream
<< "T";
644 for (auto cnt_itr
= counts
.begin(); cnt_itr
!= counts
.end();
646 *simpointStream
<< ":" << cnt_itr
->first
647 << ":" << cnt_itr
->second
<< " ";
649 *simpointStream
<< "\n";
651 intervalDrift
= (intervalCount
+ intervalDrift
) - intervalSize
;
657 ////////////////////////////////////////////////////////////////////////
659 // AtomicSimpleCPU Simulation Object
662 AtomicSimpleCPUParams::create()
665 if (!FullSystem
&& workload
.size() != 1)
666 panic("only one workload allowed");
667 return new AtomicSimpleCPU(this);