2 * Copyright (c) 2002-2005 The Regents of The University of Michigan
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 * Authors: Steve Reinhardt
31 #include "arch/locked_mem.hh"
32 #include "arch/mmaped_ipr.hh"
33 #include "arch/utility.hh"
34 #include "base/bigint.hh"
35 #include "cpu/exetrace.hh"
36 #include "cpu/simple/atomic.hh"
37 #include "mem/packet.hh"
38 #include "mem/packet_access.hh"
39 #include "params/AtomicSimpleCPU.hh"
40 #include "sim/system.hh"
43 using namespace TheISA
;
45 AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU
*c
)
46 : Event(&mainEventQueue
, CPU_Tick_Pri
), cpu(c
)
52 AtomicSimpleCPU::TickEvent::process()
58 AtomicSimpleCPU::TickEvent::description() const
60 return "AtomicSimpleCPU tick";
64 AtomicSimpleCPU::getPort(const std::string
&if_name
, int idx
)
66 if (if_name
== "dcache_port")
68 else if (if_name
== "icache_port")
70 else if (if_name
== "physmem_port") {
71 hasPhysMemPort
= true;
75 panic("No Such Port\n");
79 AtomicSimpleCPU::init()
82 cpuId
= tc
->readCpuId();
84 for (int i
= 0; i
< threadContexts
.size(); ++i
) {
85 ThreadContext
*tc
= threadContexts
[i
];
87 // initialize CPU, including PC
88 TheISA::initCPU(tc
, cpuId
);
93 AddrRangeList pmAddrList
;
94 physmemPort
.getPeerAddressRanges(pmAddrList
, snoop
);
95 physMemAddr
= *pmAddrList
.begin();
97 ifetch_req
.setThreadContext(cpuId
, 0); // Add thread ID if we add MT
98 data_read_req
.setThreadContext(cpuId
, 0); // Add thread ID here too
99 data_write_req
.setThreadContext(cpuId
, 0); // Add thread ID here too
103 AtomicSimpleCPU::CpuPort::recvTiming(PacketPtr pkt
)
105 panic("AtomicSimpleCPU doesn't expect recvTiming callback!");
110 AtomicSimpleCPU::CpuPort::recvAtomic(PacketPtr pkt
)
112 //Snooping a coherence request, just return
117 AtomicSimpleCPU::CpuPort::recvFunctional(PacketPtr pkt
)
119 //No internal storage to update, just return
124 AtomicSimpleCPU::CpuPort::recvStatusChange(Status status
)
126 if (status
== RangeChange
) {
127 if (!snoopRangeSent
) {
128 snoopRangeSent
= true;
129 sendStatusChange(Port::RangeChange
);
134 panic("AtomicSimpleCPU doesn't expect recvStatusChange callback!");
138 AtomicSimpleCPU::CpuPort::recvRetry()
140 panic("AtomicSimpleCPU doesn't expect recvRetry callback!");
144 AtomicSimpleCPU::DcachePort::setPeer(Port
*port
)
149 // Update the ThreadContext's memory ports (Functional/Virtual
151 cpu
->tcBase()->connectMemPorts();
155 AtomicSimpleCPU::AtomicSimpleCPU(Params
*p
)
156 : BaseSimpleCPU(p
), tickEvent(this), width(p
->width
),
157 simulate_data_stalls(p
->simulate_data_stalls
),
158 simulate_inst_stalls(p
->simulate_inst_stalls
),
159 icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this),
160 physmemPort(name() + "-iport", this), hasPhysMemPort(false)
164 icachePort
.snoopRangeSent
= false;
165 dcachePort
.snoopRangeSent
= false;
170 AtomicSimpleCPU::~AtomicSimpleCPU()
175 AtomicSimpleCPU::serialize(ostream
&os
)
177 SimObject::State so_state
= SimObject::getState();
178 SERIALIZE_ENUM(so_state
);
179 Status _status
= status();
180 SERIALIZE_ENUM(_status
);
181 BaseSimpleCPU::serialize(os
);
182 nameOut(os
, csprintf("%s.tickEvent", name()));
183 tickEvent
.serialize(os
);
187 AtomicSimpleCPU::unserialize(Checkpoint
*cp
, const string
§ion
)
189 SimObject::State so_state
;
190 UNSERIALIZE_ENUM(so_state
);
191 UNSERIALIZE_ENUM(_status
);
192 BaseSimpleCPU::unserialize(cp
, section
);
193 tickEvent
.unserialize(cp
, csprintf("%s.tickEvent", section
));
197 AtomicSimpleCPU::resume()
199 if (_status
== Idle
|| _status
== SwitchedOut
)
202 DPRINTF(SimpleCPU
, "Resume\n");
203 assert(system
->getMemoryMode() == Enums::atomic
);
205 changeState(SimObject::Running
);
206 if (thread
->status() == ThreadContext::Active
) {
207 if (!tickEvent
.scheduled()) {
208 tickEvent
.schedule(nextCycle());
214 AtomicSimpleCPU::switchOut()
216 assert(status() == Running
|| status() == Idle
);
217 _status
= SwitchedOut
;
224 AtomicSimpleCPU::takeOverFrom(BaseCPU
*oldCPU
)
226 BaseCPU::takeOverFrom(oldCPU
, &icachePort
, &dcachePort
);
228 assert(!tickEvent
.scheduled());
230 // if any of this CPU's ThreadContexts are active, mark the CPU as
231 // running and schedule its tick event.
232 for (int i
= 0; i
< threadContexts
.size(); ++i
) {
233 ThreadContext
*tc
= threadContexts
[i
];
234 if (tc
->status() == ThreadContext::Active
&& _status
!= Running
) {
236 tickEvent
.schedule(nextCycle());
240 if (_status
!= Running
) {
243 assert(threadContexts
.size() == 1);
244 cpuId
= tc
->readCpuId();
245 ifetch_req
.setThreadContext(cpuId
, 0); // Add thread ID if we add MT
246 data_read_req
.setThreadContext(cpuId
, 0); // Add thread ID here too
247 data_write_req
.setThreadContext(cpuId
, 0); // Add thread ID here too
252 AtomicSimpleCPU::activateContext(int thread_num
, int delay
)
254 DPRINTF(SimpleCPU
, "ActivateContext %d (%d cycles)\n", thread_num
, delay
);
256 assert(thread_num
== 0);
259 assert(_status
== Idle
);
260 assert(!tickEvent
.scheduled());
263 numCycles
+= tickToCycles(thread
->lastActivate
- thread
->lastSuspend
);
265 //Make sure ticks are still on multiples of cycles
266 tickEvent
.schedule(nextCycle(curTick
+ ticks(delay
)));
272 AtomicSimpleCPU::suspendContext(int thread_num
)
274 DPRINTF(SimpleCPU
, "SuspendContext %d\n", thread_num
);
276 assert(thread_num
== 0);
279 assert(_status
== Running
);
281 // tick event may not be scheduled if this gets called from inside
282 // an instruction's execution, e.g. "quiesce"
283 if (tickEvent
.scheduled())
284 tickEvent
.deschedule();
293 AtomicSimpleCPU::read(Addr addr
, T
&data
, unsigned flags
)
295 // use the CPU's statically allocated read request and packet objects
296 Request
*req
= &data_read_req
;
299 traceData
->setAddr(addr
);
302 //The block size of our peer.
303 int blockSize
= dcachePort
.peerBlockSize();
304 //The size of the data we're trying to read.
305 int dataSize
= sizeof(T
);
307 uint8_t * dataPtr
= (uint8_t *)&data
;
309 //The address of the second part of this access if it needs to be split
310 //across a cache line boundary.
311 Addr secondAddr
= roundDown(addr
+ dataSize
- 1, blockSize
);
313 if(secondAddr
> addr
)
314 dataSize
= secondAddr
- addr
;
319 req
->setVirt(0, addr
, dataSize
, flags
, thread
->readPC());
321 // translate to physical address
322 Fault fault
= thread
->translateDataReadReq(req
);
324 // Now do the access.
325 if (fault
== NoFault
) {
326 Packet pkt
= Packet(req
,
327 req
->isLocked() ? MemCmd::LoadLockedReq
: MemCmd::ReadReq
,
329 pkt
.dataStatic(dataPtr
);
331 if (req
->isMmapedIpr())
332 dcache_latency
+= TheISA::handleIprRead(thread
->getTC(), &pkt
);
334 if (hasPhysMemPort
&& pkt
.getAddr() == physMemAddr
)
335 dcache_latency
+= physmemPort
.sendAtomic(&pkt
);
337 dcache_latency
+= dcachePort
.sendAtomic(&pkt
);
339 dcache_access
= true;
341 assert(!pkt
.isError());
343 if (req
->isLocked()) {
344 TheISA::handleLockedRead(thread
, req
);
348 // This will need a new way to tell if it has a dcache attached.
349 if (req
->isUncacheable())
350 recordEvent("Uncached Read");
352 //If there's a fault, return it
353 if (fault
!= NoFault
)
355 //If we don't need to access a second cache line, stop now.
356 if (secondAddr
<= addr
)
360 traceData
->setData(data
);
366 * Set up for accessing the second cache line.
369 //Move the pointer we're reading into to the correct location.
371 //Adjust the size to get the remaining bytes.
372 dataSize
= addr
+ sizeof(T
) - secondAddr
;
373 //And access the right address.
379 AtomicSimpleCPU::translateDataReadAddr(Addr vaddr
, Addr
& paddr
,
380 int size
, unsigned flags
)
382 // use the CPU's statically allocated read request and packet objects
383 Request
*req
= &data_read_req
;
386 traceData
->setAddr(vaddr
);
389 //The block size of our peer.
390 int blockSize
= dcachePort
.peerBlockSize();
391 //The size of the data we're trying to read.
394 bool firstTimeThrough
= true;
396 //The address of the second part of this access if it needs to be split
397 //across a cache line boundary.
398 Addr secondAddr
= roundDown(vaddr
+ dataSize
- 1, blockSize
);
400 if(secondAddr
> vaddr
)
401 dataSize
= secondAddr
- vaddr
;
404 req
->setVirt(0, vaddr
, dataSize
, flags
, thread
->readPC());
406 // translate to physical address
407 Fault fault
= thread
->translateDataReadReq(req
);
409 //If there's a fault, return it
410 if (fault
!= NoFault
)
413 if (firstTimeThrough
) {
414 paddr
= req
->getPaddr();
415 firstTimeThrough
= false;
418 //If we don't need to access a second cache line, stop now.
419 if (secondAddr
<= vaddr
)
423 * Set up for accessing the second cache line.
426 //Adjust the size to get the remaining bytes.
427 dataSize
= vaddr
+ size
- secondAddr
;
428 //And access the right address.
433 #ifndef DOXYGEN_SHOULD_SKIP_THIS
437 AtomicSimpleCPU::read(Addr addr
, Twin32_t
&data
, unsigned flags
);
441 AtomicSimpleCPU::read(Addr addr
, Twin64_t
&data
, unsigned flags
);
445 AtomicSimpleCPU::read(Addr addr
, uint64_t &data
, unsigned flags
);
449 AtomicSimpleCPU::read(Addr addr
, uint32_t &data
, unsigned flags
);
453 AtomicSimpleCPU::read(Addr addr
, uint16_t &data
, unsigned flags
);
457 AtomicSimpleCPU::read(Addr addr
, uint8_t &data
, unsigned flags
);
459 #endif //DOXYGEN_SHOULD_SKIP_THIS
463 AtomicSimpleCPU::read(Addr addr
, double &data
, unsigned flags
)
465 return read(addr
, *(uint64_t*)&data
, flags
);
470 AtomicSimpleCPU::read(Addr addr
, float &data
, unsigned flags
)
472 return read(addr
, *(uint32_t*)&data
, flags
);
478 AtomicSimpleCPU::read(Addr addr
, int32_t &data
, unsigned flags
)
480 return read(addr
, (uint32_t&)data
, flags
);
486 AtomicSimpleCPU::write(T data
, Addr addr
, unsigned flags
, uint64_t *res
)
488 // use the CPU's statically allocated write request and packet objects
489 Request
*req
= &data_write_req
;
492 traceData
->setAddr(addr
);
495 //The block size of our peer.
496 int blockSize
= dcachePort
.peerBlockSize();
497 //The size of the data we're trying to read.
498 int dataSize
= sizeof(T
);
500 uint8_t * dataPtr
= (uint8_t *)&data
;
502 //The address of the second part of this access if it needs to be split
503 //across a cache line boundary.
504 Addr secondAddr
= roundDown(addr
+ dataSize
- 1, blockSize
);
506 if(secondAddr
> addr
)
507 dataSize
= secondAddr
- addr
;
512 req
->setVirt(0, addr
, dataSize
, flags
, thread
->readPC());
514 // translate to physical address
515 Fault fault
= thread
->translateDataWriteReq(req
);
517 // Now do the access.
518 if (fault
== NoFault
) {
519 MemCmd cmd
= MemCmd::WriteReq
; // default
520 bool do_access
= true; // flag to suppress cache access
522 if (req
->isLocked()) {
523 cmd
= MemCmd::StoreCondReq
;
524 do_access
= TheISA::handleLockedWrite(thread
, req
);
525 } else if (req
->isSwap()) {
526 cmd
= MemCmd::SwapReq
;
527 if (req
->isCondSwap()) {
529 req
->setExtraData(*res
);
534 Packet pkt
= Packet(req
, cmd
, Packet::Broadcast
);
535 pkt
.dataStatic(dataPtr
);
537 if (req
->isMmapedIpr()) {
539 TheISA::handleIprWrite(thread
->getTC(), &pkt
);
541 //XXX This needs to be outside of the loop in order to
542 //work properly for cache line boundary crossing
543 //accesses in transendian simulations.
545 if (hasPhysMemPort
&& pkt
.getAddr() == physMemAddr
)
546 dcache_latency
+= physmemPort
.sendAtomic(&pkt
);
548 dcache_latency
+= dcachePort
.sendAtomic(&pkt
);
550 dcache_access
= true;
551 assert(!pkt
.isError());
559 if (res
&& !req
->isSwap()) {
560 *res
= req
->getExtraData();
564 // This will need a new way to tell if it's hooked up to a cache or not.
565 if (req
->isUncacheable())
566 recordEvent("Uncached Write");
568 //If there's a fault or we don't need to access a second cache line,
570 if (fault
!= NoFault
|| secondAddr
<= addr
)
572 // If the write needs to have a fault on the access, consider
573 // calling changeStatus() and changing it to "bad addr write"
576 traceData
->setData(data
);
582 * Set up for accessing the second cache line.
585 //Move the pointer we're reading into to the correct location.
587 //Adjust the size to get the remaining bytes.
588 dataSize
= addr
+ sizeof(T
) - secondAddr
;
589 //And access the right address.
595 AtomicSimpleCPU::translateDataWriteAddr(Addr vaddr
, Addr
&paddr
,
596 int size
, unsigned flags
)
598 // use the CPU's statically allocated write request and packet objects
599 Request
*req
= &data_write_req
;
602 traceData
->setAddr(vaddr
);
605 //The block size of our peer.
606 int blockSize
= dcachePort
.peerBlockSize();
608 //The address of the second part of this access if it needs to be split
609 //across a cache line boundary.
610 Addr secondAddr
= roundDown(vaddr
+ size
- 1, blockSize
);
612 //The size of the data we're trying to read.
615 bool firstTimeThrough
= true;
617 if(secondAddr
> vaddr
)
618 dataSize
= secondAddr
- vaddr
;
623 req
->setVirt(0, vaddr
, dataSize
, flags
, thread
->readPC());
625 // translate to physical address
626 Fault fault
= thread
->translateDataWriteReq(req
);
628 //If there's a fault or we don't need to access a second cache line,
630 if (fault
!= NoFault
)
633 if (firstTimeThrough
) {
634 paddr
= req
->getPaddr();
635 firstTimeThrough
= false;
638 if (secondAddr
<= vaddr
)
642 * Set up for accessing the second cache line.
645 //Adjust the size to get the remaining bytes.
646 dataSize
= vaddr
+ size
- secondAddr
;
647 //And access the right address.
653 #ifndef DOXYGEN_SHOULD_SKIP_THIS
657 AtomicSimpleCPU::write(Twin32_t data
, Addr addr
,
658 unsigned flags
, uint64_t *res
);
662 AtomicSimpleCPU::write(Twin64_t data
, Addr addr
,
663 unsigned flags
, uint64_t *res
);
667 AtomicSimpleCPU::write(uint64_t data
, Addr addr
,
668 unsigned flags
, uint64_t *res
);
672 AtomicSimpleCPU::write(uint32_t data
, Addr addr
,
673 unsigned flags
, uint64_t *res
);
677 AtomicSimpleCPU::write(uint16_t data
, Addr addr
,
678 unsigned flags
, uint64_t *res
);
682 AtomicSimpleCPU::write(uint8_t data
, Addr addr
,
683 unsigned flags
, uint64_t *res
);
685 #endif //DOXYGEN_SHOULD_SKIP_THIS
689 AtomicSimpleCPU::write(double data
, Addr addr
, unsigned flags
, uint64_t *res
)
691 return write(*(uint64_t*)&data
, addr
, flags
, res
);
696 AtomicSimpleCPU::write(float data
, Addr addr
, unsigned flags
, uint64_t *res
)
698 return write(*(uint32_t*)&data
, addr
, flags
, res
);
704 AtomicSimpleCPU::write(int32_t data
, Addr addr
, unsigned flags
, uint64_t *res
)
706 return write((uint32_t)data
, addr
, flags
, res
);
711 AtomicSimpleCPU::tick()
713 DPRINTF(SimpleCPU
, "Tick\n");
717 for (int i
= 0; i
< width
; ++i
) {
720 if (!curStaticInst
|| !curStaticInst
->isDelayedCommit())
721 checkForInterrupts();
725 Fault fault
= setupFetchRequest(&ifetch_req
);
727 if (fault
== NoFault
) {
728 Tick icache_latency
= 0;
729 bool icache_access
= false;
730 dcache_access
= false; // assume no dcache access
732 //Fetch more instruction memory if necessary
733 //if(predecoder.needMoreBytes())
735 icache_access
= true;
736 Packet ifetch_pkt
= Packet(&ifetch_req
, MemCmd::ReadReq
,
738 ifetch_pkt
.dataStatic(&inst
);
740 if (hasPhysMemPort
&& ifetch_pkt
.getAddr() == physMemAddr
)
741 icache_latency
= physmemPort
.sendAtomic(&ifetch_pkt
);
743 icache_latency
= icachePort
.sendAtomic(&ifetch_pkt
);
745 assert(!ifetch_pkt
.isError());
747 // ifetch_req is initialized to read the instruction directly
748 // into the CPU object's inst field.
754 fault
= curStaticInst
->execute(this, traceData
);
756 // keep an instruction count
757 if (fault
== NoFault
)
759 else if (traceData
) {
760 // If there was a fault, we should trace this instruction.
768 // @todo remove me after debugging with legion done
769 if (curStaticInst
&& (!curStaticInst
->isMicroop() ||
770 curStaticInst
->isFirstMicroop()))
773 Tick stall_ticks
= 0;
774 if (simulate_inst_stalls
&& icache_access
)
775 stall_ticks
+= icache_latency
;
777 if (simulate_data_stalls
&& dcache_access
)
778 stall_ticks
+= dcache_latency
;
781 Tick stall_cycles
= stall_ticks
/ ticks(1);
782 Tick aligned_stall_ticks
= ticks(stall_cycles
);
784 if (aligned_stall_ticks
< stall_ticks
)
785 aligned_stall_ticks
+= 1;
787 latency
+= aligned_stall_ticks
;
791 if(fault
!= NoFault
|| !stayAtPC
)
795 // instruction takes at least one cycle
796 if (latency
< ticks(1))
800 tickEvent
.schedule(curTick
+ latency
);
805 AtomicSimpleCPU::printAddr(Addr a
)
807 dcachePort
.printAddr(a
);
811 ////////////////////////////////////////////////////////////////////////
813 // AtomicSimpleCPU Simulation Object
816 AtomicSimpleCPUParams::create()
818 AtomicSimpleCPU::Params
*params
= new AtomicSimpleCPU::Params();
820 params
->numberOfThreads
= 1;
821 params
->max_insts_any_thread
= max_insts_any_thread
;
822 params
->max_insts_all_threads
= max_insts_all_threads
;
823 params
->max_loads_any_thread
= max_loads_any_thread
;
824 params
->max_loads_all_threads
= max_loads_all_threads
;
825 params
->progress_interval
= progress_interval
;
826 params
->deferRegistration
= defer_registration
;
827 params
->phase
= phase
;
828 params
->clock
= clock
;
829 params
->functionTrace
= function_trace
;
830 params
->functionTraceStart
= function_trace_start
;
831 params
->width
= width
;
832 params
->simulate_data_stalls
= simulate_data_stalls
;
833 params
->simulate_inst_stalls
= simulate_inst_stalls
;
834 params
->system
= system
;
835 params
->cpu_id
= cpu_id
;
836 params
->tracer
= tracer
;
841 params
->profile
= profile
;
842 params
->do_quiesce
= do_quiesce
;
843 params
->do_checkpoint_insts
= do_checkpoint_insts
;
844 params
->do_statistics_insts
= do_statistics_insts
;
846 if (workload
.size() != 1)
847 panic("only one workload allowed");
848 params
->process
= workload
[0];
851 AtomicSimpleCPU
*cpu
= new AtomicSimpleCPU(params
);