Revert power patch sets with unexpected interactions
[gem5.git] / src / cpu / simple / atomic.cc
1 /*
2 * Copyright 2014 Google, Inc.
3 * Copyright (c) 2012-2013,2015 ARM Limited
4 * All rights reserved.
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2002-2005 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 *
41 * Authors: Steve Reinhardt
42 */
43
44 #include "arch/locked_mem.hh"
45 #include "arch/mmapped_ipr.hh"
46 #include "arch/utility.hh"
47 #include "base/bigint.hh"
48 #include "base/output.hh"
49 #include "config/the_isa.hh"
50 #include "cpu/simple/atomic.hh"
51 #include "cpu/exetrace.hh"
52 #include "debug/Drain.hh"
53 #include "debug/ExecFaulting.hh"
54 #include "debug/SimpleCPU.hh"
55 #include "mem/packet.hh"
56 #include "mem/packet_access.hh"
57 #include "mem/physical.hh"
58 #include "params/AtomicSimpleCPU.hh"
59 #include "sim/faults.hh"
60 #include "sim/system.hh"
61 #include "sim/full_system.hh"
62
63 using namespace std;
64 using namespace TheISA;
65
66 AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c)
67 : Event(CPU_Tick_Pri), cpu(c)
68 {
69 }
70
71
72 void
73 AtomicSimpleCPU::TickEvent::process()
74 {
75 cpu->tick();
76 }
77
78 const char *
79 AtomicSimpleCPU::TickEvent::description() const
80 {
81 return "AtomicSimpleCPU tick";
82 }
83
84 void
85 AtomicSimpleCPU::init()
86 {
87 BaseSimpleCPU::init();
88
89 int cid = threadContexts[0]->contextId();
90 ifetch_req.setThreadContext(cid, 0);
91 data_read_req.setThreadContext(cid, 0);
92 data_write_req.setThreadContext(cid, 0);
93 }
94
95 AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
96 : BaseSimpleCPU(p), tickEvent(this), width(p->width), locked(false),
97 simulate_data_stalls(p->simulate_data_stalls),
98 simulate_inst_stalls(p->simulate_inst_stalls),
99 icachePort(name() + ".icache_port", this),
100 dcachePort(name() + ".dcache_port", this),
101 fastmem(p->fastmem), dcache_access(false), dcache_latency(0),
102 ppCommit(nullptr)
103 {
104 _status = Idle;
105 }
106
107
108 AtomicSimpleCPU::~AtomicSimpleCPU()
109 {
110 if (tickEvent.scheduled()) {
111 deschedule(tickEvent);
112 }
113 }
114
115 DrainState
116 AtomicSimpleCPU::drain()
117 {
118 if (switchedOut())
119 return DrainState::Drained;
120
121 if (!isDrained()) {
122 DPRINTF(Drain, "Requesting drain.\n");
123 return DrainState::Draining;
124 } else {
125 if (tickEvent.scheduled())
126 deschedule(tickEvent);
127
128 activeThreads.clear();
129 DPRINTF(Drain, "Not executing microcode, no need to drain.\n");
130 return DrainState::Drained;
131 }
132 }
133
134 void
135 AtomicSimpleCPU::threadSnoop(PacketPtr pkt, ThreadID sender)
136 {
137 DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
138 pkt->cmdString());
139
140 for (ThreadID tid = 0; tid < numThreads; tid++) {
141 if (tid != sender) {
142 if (getCpuAddrMonitor(tid)->doMonitor(pkt)) {
143 wakeup(tid);
144 }
145
146 TheISA::handleLockedSnoop(threadInfo[tid]->thread,
147 pkt, dcachePort.cacheBlockMask);
148 }
149 }
150 }
151
152 void
153 AtomicSimpleCPU::drainResume()
154 {
155 assert(!tickEvent.scheduled());
156 if (switchedOut())
157 return;
158
159 DPRINTF(SimpleCPU, "Resume\n");
160 verifyMemoryMode();
161
162 assert(!threadContexts.empty());
163
164 _status = BaseSimpleCPU::Idle;
165
166 for (ThreadID tid = 0; tid < numThreads; tid++) {
167 if (threadInfo[tid]->thread->status() == ThreadContext::Active) {
168 threadInfo[tid]->notIdleFraction = 1;
169 activeThreads.push_back(tid);
170 _status = BaseSimpleCPU::Running;
171
172 // Tick if any threads active
173 if (!tickEvent.scheduled()) {
174 schedule(tickEvent, nextCycle());
175 }
176 } else {
177 threadInfo[tid]->notIdleFraction = 0;
178 }
179 }
180 }
181
182 bool
183 AtomicSimpleCPU::tryCompleteDrain()
184 {
185 if (drainState() != DrainState::Draining)
186 return false;
187
188 DPRINTF(Drain, "tryCompleteDrain.\n");
189 if (!isDrained())
190 return false;
191
192 DPRINTF(Drain, "CPU done draining, processing drain event\n");
193 signalDrainDone();
194
195 return true;
196 }
197
198
199 void
200 AtomicSimpleCPU::switchOut()
201 {
202 BaseSimpleCPU::switchOut();
203
204 assert(!tickEvent.scheduled());
205 assert(_status == BaseSimpleCPU::Running || _status == Idle);
206 assert(isDrained());
207 }
208
209
210 void
211 AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
212 {
213 BaseSimpleCPU::takeOverFrom(oldCPU);
214
215 // The tick event should have been descheduled by drain()
216 assert(!tickEvent.scheduled());
217 }
218
219 void
220 AtomicSimpleCPU::verifyMemoryMode() const
221 {
222 if (!system->isAtomicMode()) {
223 fatal("The atomic CPU requires the memory system to be in "
224 "'atomic' mode.\n");
225 }
226 }
227
228 void
229 AtomicSimpleCPU::activateContext(ThreadID thread_num)
230 {
231 DPRINTF(SimpleCPU, "ActivateContext %d\n", thread_num);
232
233 assert(thread_num < numThreads);
234
235 threadInfo[thread_num]->notIdleFraction = 1;
236 Cycles delta = ticksToCycles(threadInfo[thread_num]->thread->lastActivate -
237 threadInfo[thread_num]->thread->lastSuspend);
238 numCycles += delta;
239 ppCycles->notify(delta);
240
241 if (!tickEvent.scheduled()) {
242 //Make sure ticks are still on multiples of cycles
243 schedule(tickEvent, clockEdge(Cycles(0)));
244 }
245 _status = BaseSimpleCPU::Running;
246 if (std::find(activeThreads.begin(), activeThreads.end(), thread_num)
247 == activeThreads.end()) {
248 activeThreads.push_back(thread_num);
249 }
250 }
251
252
253 void
254 AtomicSimpleCPU::suspendContext(ThreadID thread_num)
255 {
256 DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
257
258 assert(thread_num < numThreads);
259 activeThreads.remove(thread_num);
260
261 if (_status == Idle)
262 return;
263
264 assert(_status == BaseSimpleCPU::Running);
265
266 threadInfo[thread_num]->notIdleFraction = 0;
267
268 if (activeThreads.empty()) {
269 _status = Idle;
270
271 if (tickEvent.scheduled()) {
272 deschedule(tickEvent);
273 }
274 }
275
276 }
277
278
279 Tick
280 AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt)
281 {
282 DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
283 pkt->cmdString());
284
285 // X86 ISA: Snooping an invalidation for monitor/mwait
286 AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
287
288 for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
289 if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
290 cpu->wakeup(tid);
291 }
292 }
293
294 // if snoop invalidates, release any associated locks
295 // When run without caches, Invalidation packets will not be received
296 // hence we must check if the incoming packets are writes and wakeup
297 // the processor accordingly
298 if (pkt->isInvalidate() || pkt->isWrite()) {
299 DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
300 pkt->getAddr());
301 for (auto &t_info : cpu->threadInfo) {
302 TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
303 }
304 }
305
306 return 0;
307 }
308
309 void
310 AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt)
311 {
312 DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
313 pkt->cmdString());
314
315 // X86 ISA: Snooping an invalidation for monitor/mwait
316 AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
317 for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
318 if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
319 cpu->wakeup(tid);
320 }
321 }
322
323 // if snoop invalidates, release any associated locks
324 if (pkt->isInvalidate()) {
325 DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
326 pkt->getAddr());
327 for (auto &t_info : cpu->threadInfo) {
328 TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
329 }
330 }
331 }
332
333 Fault
334 AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
335 unsigned size, unsigned flags)
336 {
337 SimpleExecContext& t_info = *threadInfo[curThread];
338 SimpleThread* thread = t_info.thread;
339
340 // use the CPU's statically allocated read request and packet objects
341 Request *req = &data_read_req;
342
343 if (traceData)
344 traceData->setMem(addr, size, flags);
345
346 //The size of the data we're trying to read.
347 int fullSize = size;
348
349 //The address of the second part of this access if it needs to be split
350 //across a cache line boundary.
351 Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
352
353 if (secondAddr > addr)
354 size = secondAddr - addr;
355
356 dcache_latency = 0;
357
358 req->taskId(taskId());
359 while (1) {
360 req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
361
362 // translate to physical address
363 Fault fault = thread->dtb->translateAtomic(req, thread->getTC(),
364 BaseTLB::Read);
365
366 // Now do the access.
367 if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
368 Packet pkt(req, Packet::makeReadCmd(req));
369 pkt.dataStatic(data);
370
371 if (req->isMmappedIpr())
372 dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt);
373 else {
374 if (fastmem && system->isMemAddr(pkt.getAddr()))
375 system->getPhysMem().access(&pkt);
376 else
377 dcache_latency += dcachePort.sendAtomic(&pkt);
378 }
379 dcache_access = true;
380
381 assert(!pkt.isError());
382
383 if (req->isLLSC()) {
384 TheISA::handleLockedRead(thread, req);
385 }
386 }
387
388 //If there's a fault, return it
389 if (fault != NoFault) {
390 if (req->isPrefetch()) {
391 return NoFault;
392 } else {
393 return fault;
394 }
395 }
396
397 //If we don't need to access a second cache line, stop now.
398 if (secondAddr <= addr)
399 {
400 if (req->isLockedRMW() && fault == NoFault) {
401 assert(!locked);
402 locked = true;
403 }
404
405 return fault;
406 }
407
408 /*
409 * Set up for accessing the second cache line.
410 */
411
412 //Move the pointer we're reading into to the correct location.
413 data += size;
414 //Adjust the size to get the remaining bytes.
415 size = addr + fullSize - secondAddr;
416 //And access the right address.
417 addr = secondAddr;
418 }
419 }
420
421 Fault
422 AtomicSimpleCPU::initiateMemRead(Addr addr, unsigned size, unsigned flags)
423 {
424 panic("initiateMemRead() is for timing accesses, and should "
425 "never be called on AtomicSimpleCPU.\n");
426 }
427
428 Fault
429 AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
430 Addr addr, unsigned flags, uint64_t *res)
431 {
432 SimpleExecContext& t_info = *threadInfo[curThread];
433 SimpleThread* thread = t_info.thread;
434 static uint8_t zero_array[64] = {};
435
436 if (data == NULL) {
437 assert(size <= 64);
438 assert(flags & Request::CACHE_BLOCK_ZERO);
439 // This must be a cache block cleaning request
440 data = zero_array;
441 }
442
443 // use the CPU's statically allocated write request and packet objects
444 Request *req = &data_write_req;
445
446 if (traceData)
447 traceData->setMem(addr, size, flags);
448
449 //The size of the data we're trying to read.
450 int fullSize = size;
451
452 //The address of the second part of this access if it needs to be split
453 //across a cache line boundary.
454 Addr secondAddr = roundDown(addr + size - 1, cacheLineSize());
455
456 if (secondAddr > addr)
457 size = secondAddr - addr;
458
459 dcache_latency = 0;
460
461 req->taskId(taskId());
462 while (1) {
463 req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
464
465 // translate to physical address
466 Fault fault = thread->dtb->translateAtomic(req, thread->getTC(), BaseTLB::Write);
467
468 // Now do the access.
469 if (fault == NoFault) {
470 MemCmd cmd = MemCmd::WriteReq; // default
471 bool do_access = true; // flag to suppress cache access
472
473 if (req->isLLSC()) {
474 cmd = MemCmd::StoreCondReq;
475 do_access = TheISA::handleLockedWrite(thread, req, dcachePort.cacheBlockMask);
476 } else if (req->isSwap()) {
477 cmd = MemCmd::SwapReq;
478 if (req->isCondSwap()) {
479 assert(res);
480 req->setExtraData(*res);
481 }
482 }
483
484 if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) {
485 Packet pkt = Packet(req, cmd);
486 pkt.dataStatic(data);
487
488 if (req->isMmappedIpr()) {
489 dcache_latency +=
490 TheISA::handleIprWrite(thread->getTC(), &pkt);
491 } else {
492 if (fastmem && system->isMemAddr(pkt.getAddr()))
493 system->getPhysMem().access(&pkt);
494 else
495 dcache_latency += dcachePort.sendAtomic(&pkt);
496
497 // Notify other threads on this CPU of write
498 threadSnoop(&pkt, curThread);
499 }
500 dcache_access = true;
501 assert(!pkt.isError());
502
503 if (req->isSwap()) {
504 assert(res);
505 memcpy(res, pkt.getConstPtr<uint8_t>(), fullSize);
506 }
507 }
508
509 if (res && !req->isSwap()) {
510 *res = req->getExtraData();
511 }
512 }
513
514 //If there's a fault or we don't need to access a second cache line,
515 //stop now.
516 if (fault != NoFault || secondAddr <= addr)
517 {
518 if (req->isLockedRMW() && fault == NoFault) {
519 assert(locked);
520 locked = false;
521 }
522
523
524 if (fault != NoFault && req->isPrefetch()) {
525 return NoFault;
526 } else {
527 return fault;
528 }
529 }
530
531 /*
532 * Set up for accessing the second cache line.
533 */
534
535 //Move the pointer we're reading into to the correct location.
536 data += size;
537 //Adjust the size to get the remaining bytes.
538 size = addr + fullSize - secondAddr;
539 //And access the right address.
540 addr = secondAddr;
541 }
542 }
543
544
545 void
546 AtomicSimpleCPU::tick()
547 {
548 DPRINTF(SimpleCPU, "Tick\n");
549
550 // Change thread if multi-threaded
551 swapActiveThread();
552
553 // Set memroy request ids to current thread
554 if (numThreads > 1) {
555 ContextID cid = threadContexts[curThread]->contextId();
556
557 ifetch_req.setThreadContext(cid, curThread);
558 data_read_req.setThreadContext(cid, curThread);
559 data_write_req.setThreadContext(cid, curThread);
560 }
561
562 SimpleExecContext& t_info = *threadInfo[curThread];
563 SimpleThread* thread = t_info.thread;
564
565 Tick latency = 0;
566
567 for (int i = 0; i < width || locked; ++i) {
568 numCycles++;
569 ppCycles->notify(1);
570
571 if (!curStaticInst || !curStaticInst->isDelayedCommit()) {
572 checkForInterrupts();
573 checkPcEventQueue();
574 }
575
576 // We must have just got suspended by a PC event
577 if (_status == Idle) {
578 tryCompleteDrain();
579 return;
580 }
581
582 Fault fault = NoFault;
583
584 TheISA::PCState pcState = thread->pcState();
585
586 bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
587 !curMacroStaticInst;
588 if (needToFetch) {
589 ifetch_req.taskId(taskId());
590 setupFetchRequest(&ifetch_req);
591 fault = thread->itb->translateAtomic(&ifetch_req, thread->getTC(),
592 BaseTLB::Execute);
593 }
594
595 if (fault == NoFault) {
596 Tick icache_latency = 0;
597 bool icache_access = false;
598 dcache_access = false; // assume no dcache access
599
600 if (needToFetch) {
601 // This is commented out because the decoder would act like
602 // a tiny cache otherwise. It wouldn't be flushed when needed
603 // like the I cache. It should be flushed, and when that works
604 // this code should be uncommented.
605 //Fetch more instruction memory if necessary
606 //if (decoder.needMoreBytes())
607 //{
608 icache_access = true;
609 Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq);
610 ifetch_pkt.dataStatic(&inst);
611
612 if (fastmem && system->isMemAddr(ifetch_pkt.getAddr()))
613 system->getPhysMem().access(&ifetch_pkt);
614 else
615 icache_latency = icachePort.sendAtomic(&ifetch_pkt);
616
617 assert(!ifetch_pkt.isError());
618
619 // ifetch_req is initialized to read the instruction directly
620 // into the CPU object's inst field.
621 //}
622 }
623
624 preExecute();
625
626 if (curStaticInst) {
627 fault = curStaticInst->execute(&t_info, traceData);
628
629 // keep an instruction count
630 if (fault == NoFault) {
631 countInst();
632 ppCommit->notify(std::make_pair(thread, curStaticInst));
633 }
634 else if (traceData && !DTRACE(ExecFaulting)) {
635 delete traceData;
636 traceData = NULL;
637 }
638
639 postExecute();
640 }
641
642 // @todo remove me after debugging with legion done
643 if (curStaticInst && (!curStaticInst->isMicroop() ||
644 curStaticInst->isFirstMicroop()))
645 instCnt++;
646
647 Tick stall_ticks = 0;
648 if (simulate_inst_stalls && icache_access)
649 stall_ticks += icache_latency;
650
651 if (simulate_data_stalls && dcache_access)
652 stall_ticks += dcache_latency;
653
654 if (stall_ticks) {
655 // the atomic cpu does its accounting in ticks, so
656 // keep counting in ticks but round to the clock
657 // period
658 latency += divCeil(stall_ticks, clockPeriod()) *
659 clockPeriod();
660 }
661
662 }
663 if (fault != NoFault || !t_info.stayAtPC)
664 advancePC(fault);
665 }
666
667 if (tryCompleteDrain())
668 return;
669
670 // instruction takes at least one cycle
671 if (latency < clockPeriod())
672 latency = clockPeriod();
673
674 if (_status != Idle)
675 reschedule(tickEvent, curTick() + latency, true);
676 }
677
678 void
679 AtomicSimpleCPU::regProbePoints()
680 {
681 BaseCPU::regProbePoints();
682
683 ppCommit = new ProbePointArg<pair<SimpleThread*, const StaticInstPtr>>
684 (getProbeManager(), "Commit");
685 }
686
687 void
688 AtomicSimpleCPU::printAddr(Addr a)
689 {
690 dcachePort.printAddr(a);
691 }
692
693 ////////////////////////////////////////////////////////////////////////
694 //
695 // AtomicSimpleCPU Simulation Object
696 //
697 AtomicSimpleCPU *
698 AtomicSimpleCPUParams::create()
699 {
700 return new AtomicSimpleCPU(this);
701 }