cpu: Add a memory access predicate
[gem5.git] / src / cpu / minor / lsq.cc
1 /*
2 * Copyright (c) 2013-2014,2017 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Andrew Bardsley
38 */
39
40 #include "cpu/minor/lsq.hh"
41
42 #include <iomanip>
43 #include <sstream>
44
45 #include "arch/locked_mem.hh"
46 #include "arch/mmapped_ipr.hh"
47 #include "base/logging.hh"
48 #include "cpu/minor/cpu.hh"
49 #include "cpu/minor/exec_context.hh"
50 #include "cpu/minor/execute.hh"
51 #include "cpu/minor/pipeline.hh"
52 #include "debug/Activity.hh"
53 #include "debug/MinorMem.hh"
54
55 namespace Minor
56 {
57
58 /** Returns the offset of addr into an aligned a block of size block_size */
59 static Addr
60 addrBlockOffset(Addr addr, unsigned int block_size)
61 {
62 return addr & (block_size - 1);
63 }
64
65 /** Returns true if the given [addr .. addr+size-1] transfer needs to be
66 * fragmented across a block size of block_size */
67 static bool
68 transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size)
69 {
70 return (addrBlockOffset(addr, block_size) + size) > block_size;
71 }
72
73 LSQ::LSQRequest::LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_,
74 PacketDataPtr data_, uint64_t *res_) :
75 SenderState(),
76 port(port_),
77 inst(inst_),
78 isLoad(isLoad_),
79 data(data_),
80 packet(NULL),
81 request(),
82 fault(NoFault),
83 res(res_),
84 skipped(false),
85 issuedToMemory(false),
86 state(NotIssued)
87 {
88 request = std::make_shared<Request>();
89 }
90
91 LSQ::AddrRangeCoverage
92 LSQ::LSQRequest::containsAddrRangeOf(
93 Addr req1_addr, unsigned int req1_size,
94 Addr req2_addr, unsigned int req2_size)
95 {
96 /* 'end' here means the address of the byte just past the request
97 * blocks */
98 Addr req2_end_addr = req2_addr + req2_size;
99 Addr req1_end_addr = req1_addr + req1_size;
100
101 AddrRangeCoverage ret;
102
103 if (req1_addr >= req2_end_addr || req1_end_addr <= req2_addr)
104 ret = NoAddrRangeCoverage;
105 else if (req1_addr <= req2_addr && req1_end_addr >= req2_end_addr)
106 ret = FullAddrRangeCoverage;
107 else
108 ret = PartialAddrRangeCoverage;
109
110 return ret;
111 }
112
113 LSQ::AddrRangeCoverage
114 LSQ::LSQRequest::containsAddrRangeOf(LSQRequestPtr other_request)
115 {
116 return containsAddrRangeOf(request->getPaddr(), request->getSize(),
117 other_request->request->getPaddr(), other_request->request->getSize());
118 }
119
120 bool
121 LSQ::LSQRequest::isBarrier()
122 {
123 return inst->isInst() && inst->staticInst->isMemBarrier();
124 }
125
126 bool
127 LSQ::LSQRequest::needsToBeSentToStoreBuffer()
128 {
129 return state == StoreToStoreBuffer;
130 }
131
132 void
133 LSQ::LSQRequest::setState(LSQRequestState new_state)
134 {
135 DPRINTFS(MinorMem, (&port), "Setting state from %d to %d for request:"
136 " %s\n", state, new_state, *inst);
137 state = new_state;
138 }
139
140 bool
141 LSQ::LSQRequest::isComplete() const
142 {
143 /* @todo, There is currently only one 'completed' state. This
144 * may not be a good choice */
145 return state == Complete;
146 }
147
148 void
149 LSQ::LSQRequest::reportData(std::ostream &os) const
150 {
151 os << (isLoad ? 'R' : 'W') << ';';
152 inst->reportData(os);
153 os << ';' << state;
154 }
155
156 std::ostream &
157 operator <<(std::ostream &os, LSQ::AddrRangeCoverage coverage)
158 {
159 switch (coverage) {
160 case LSQ::PartialAddrRangeCoverage:
161 os << "PartialAddrRangeCoverage";
162 break;
163 case LSQ::FullAddrRangeCoverage:
164 os << "FullAddrRangeCoverage";
165 break;
166 case LSQ::NoAddrRangeCoverage:
167 os << "NoAddrRangeCoverage";
168 break;
169 default:
170 os << "AddrRangeCoverage-" << static_cast<int>(coverage);
171 break;
172 }
173 return os;
174 }
175
176 std::ostream &
177 operator <<(std::ostream &os, LSQ::LSQRequest::LSQRequestState state)
178 {
179 switch (state) {
180 case LSQ::LSQRequest::NotIssued:
181 os << "NotIssued";
182 break;
183 case LSQ::LSQRequest::InTranslation:
184 os << "InTranslation";
185 break;
186 case LSQ::LSQRequest::Translated:
187 os << "Translated";
188 break;
189 case LSQ::LSQRequest::Failed:
190 os << "Failed";
191 break;
192 case LSQ::LSQRequest::RequestIssuing:
193 os << "RequestIssuing";
194 break;
195 case LSQ::LSQRequest::StoreToStoreBuffer:
196 os << "StoreToStoreBuffer";
197 break;
198 case LSQ::LSQRequest::StoreInStoreBuffer:
199 os << "StoreInStoreBuffer";
200 break;
201 case LSQ::LSQRequest::StoreBufferIssuing:
202 os << "StoreBufferIssuing";
203 break;
204 case LSQ::LSQRequest::RequestNeedsRetry:
205 os << "RequestNeedsRetry";
206 break;
207 case LSQ::LSQRequest::StoreBufferNeedsRetry:
208 os << "StoreBufferNeedsRetry";
209 break;
210 case LSQ::LSQRequest::Complete:
211 os << "Complete";
212 break;
213 default:
214 os << "LSQRequestState-" << static_cast<int>(state);
215 break;
216 }
217 return os;
218 }
219
220 void
221 LSQ::clearMemBarrier(MinorDynInstPtr inst)
222 {
223 bool is_last_barrier =
224 inst->id.execSeqNum >= lastMemBarrier[inst->id.threadId];
225
226 DPRINTF(MinorMem, "Moving %s barrier out of store buffer inst: %s\n",
227 (is_last_barrier ? "last" : "a"), *inst);
228
229 if (is_last_barrier)
230 lastMemBarrier[inst->id.threadId] = 0;
231 }
232
233 void
234 LSQ::SingleDataRequest::finish(const Fault &fault_, const RequestPtr &request_,
235 ThreadContext *tc, BaseTLB::Mode mode)
236 {
237 fault = fault_;
238
239 port.numAccessesInDTLB--;
240
241 DPRINTFS(MinorMem, (&port), "Received translation response for"
242 " request: %s\n", *inst);
243
244 makePacket();
245
246 setState(Translated);
247 port.tryToSendToTransfers(this);
248
249 /* Let's try and wake up the processor for the next cycle */
250 port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
251 }
252
253 void
254 LSQ::SingleDataRequest::startAddrTranslation()
255 {
256 ThreadContext *thread = port.cpu.getContext(
257 inst->id.threadId);
258
259 port.numAccessesInDTLB++;
260
261 setState(LSQ::LSQRequest::InTranslation);
262
263 DPRINTFS(MinorMem, (&port), "Submitting DTLB request\n");
264 /* Submit the translation request. The response will come through
265 * finish/markDelayed on the LSQRequest as it bears the Translation
266 * interface */
267 thread->getDTBPtr()->translateTiming(
268 request, thread, this, (isLoad ? BaseTLB::Read : BaseTLB::Write));
269 }
270
271 void
272 LSQ::SingleDataRequest::retireResponse(PacketPtr packet_)
273 {
274 DPRINTFS(MinorMem, (&port), "Retiring packet\n");
275 packet = packet_;
276 packetInFlight = false;
277 setState(Complete);
278 }
279
280 void
281 LSQ::SplitDataRequest::finish(const Fault &fault_, const RequestPtr &request_,
282 ThreadContext *tc, BaseTLB::Mode mode)
283 {
284 fault = fault_;
285
286 port.numAccessesInDTLB--;
287
288 unsigned int M5_VAR_USED expected_fragment_index =
289 numTranslatedFragments;
290
291 numInTranslationFragments--;
292 numTranslatedFragments++;
293
294 DPRINTFS(MinorMem, (&port), "Received translation response for fragment"
295 " %d of request: %s\n", expected_fragment_index, *inst);
296
297 assert(request_ == fragmentRequests[expected_fragment_index]);
298
299 /* Wake up next cycle to get things going again in case the
300 * tryToSendToTransfers does take */
301 port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
302
303 if (fault != NoFault) {
304 /* tryToSendToTransfers will handle the fault */
305
306 DPRINTFS(MinorMem, (&port), "Faulting translation for fragment:"
307 " %d of request: %s\n",
308 expected_fragment_index, *inst);
309
310 setState(Translated);
311 port.tryToSendToTransfers(this);
312 } else if (numTranslatedFragments == numFragments) {
313 makeFragmentPackets();
314
315 setState(Translated);
316 port.tryToSendToTransfers(this);
317 } else {
318 /* Avoid calling translateTiming from within ::finish */
319 assert(!translationEvent.scheduled());
320 port.cpu.schedule(translationEvent, curTick());
321 }
322 }
323
324 LSQ::SplitDataRequest::SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_,
325 bool isLoad_, PacketDataPtr data_, uint64_t *res_) :
326 LSQRequest(port_, inst_, isLoad_, data_, res_),
327 translationEvent([this]{ sendNextFragmentToTranslation(); },
328 "translationEvent"),
329 numFragments(0),
330 numInTranslationFragments(0),
331 numTranslatedFragments(0),
332 numIssuedFragments(0),
333 numRetiredFragments(0),
334 fragmentRequests(),
335 fragmentPackets()
336 {
337 /* Don't know how many elements are needed until the request is
338 * populated by the caller. */
339 }
340
341 LSQ::SplitDataRequest::~SplitDataRequest()
342 {
343 for (auto i = fragmentPackets.begin();
344 i != fragmentPackets.end(); i++)
345 {
346 delete *i;
347 }
348 }
349
350 void
351 LSQ::SplitDataRequest::makeFragmentRequests()
352 {
353 Addr base_addr = request->getVaddr();
354 unsigned int whole_size = request->getSize();
355 unsigned int line_width = port.lineWidth;
356
357 unsigned int fragment_size;
358 Addr fragment_addr;
359
360 /* Assume that this transfer is across potentially many block snap
361 * boundaries:
362 *
363 * | _|________|________|________|___ |
364 * | |0| 1 | 2 | 3 | 4 | |
365 * | |_|________|________|________|___| |
366 * | | | | | |
367 *
368 * The first transfer (0) can be up to lineWidth in size.
369 * All the middle transfers (1-3) are lineWidth in size
370 * The last transfer (4) can be from zero to lineWidth - 1 in size
371 */
372 unsigned int first_fragment_offset =
373 addrBlockOffset(base_addr, line_width);
374 unsigned int last_fragment_size =
375 addrBlockOffset(base_addr + whole_size, line_width);
376 unsigned int first_fragment_size =
377 line_width - first_fragment_offset;
378
379 unsigned int middle_fragments_total_size =
380 whole_size - (first_fragment_size + last_fragment_size);
381
382 assert(addrBlockOffset(middle_fragments_total_size, line_width) == 0);
383
384 unsigned int middle_fragment_count =
385 middle_fragments_total_size / line_width;
386
387 numFragments = 1 /* first */ + middle_fragment_count +
388 (last_fragment_size == 0 ? 0 : 1);
389
390 DPRINTFS(MinorMem, (&port), "Dividing transfer into %d fragmentRequests."
391 " First fragment size: %d Last fragment size: %d\n",
392 numFragments, first_fragment_size,
393 (last_fragment_size == 0 ? line_width : last_fragment_size));
394
395 assert(((middle_fragment_count * line_width) +
396 first_fragment_size + last_fragment_size) == whole_size);
397
398 fragment_addr = base_addr;
399 fragment_size = first_fragment_size;
400
401 /* Just past the last address in the request */
402 Addr end_addr = base_addr + whole_size;
403
404 for (unsigned int fragment_index = 0; fragment_index < numFragments;
405 fragment_index++)
406 {
407 bool M5_VAR_USED is_last_fragment = false;
408
409 if (fragment_addr == base_addr) {
410 /* First fragment */
411 fragment_size = first_fragment_size;
412 } else {
413 if ((fragment_addr + line_width) > end_addr) {
414 /* Adjust size of last fragment */
415 fragment_size = end_addr - fragment_addr;
416 is_last_fragment = true;
417 } else {
418 /* Middle fragments */
419 fragment_size = line_width;
420 }
421 }
422
423 RequestPtr fragment = std::make_shared<Request>();
424
425 fragment->setContext(request->contextId());
426 fragment->setVirt(0 /* asid */,
427 fragment_addr, fragment_size, request->getFlags(),
428 request->masterId(),
429 request->getPC());
430
431 DPRINTFS(MinorMem, (&port), "Generating fragment addr: 0x%x size: %d"
432 " (whole request addr: 0x%x size: %d) %s\n",
433 fragment_addr, fragment_size, base_addr, whole_size,
434 (is_last_fragment ? "last fragment" : ""));
435
436 fragment_addr += fragment_size;
437
438 fragmentRequests.push_back(fragment);
439 }
440 }
441
442 void
443 LSQ::SplitDataRequest::makeFragmentPackets()
444 {
445 Addr base_addr = request->getVaddr();
446
447 DPRINTFS(MinorMem, (&port), "Making packets for request: %s\n", *inst);
448
449 for (unsigned int fragment_index = 0; fragment_index < numFragments;
450 fragment_index++)
451 {
452 RequestPtr fragment = fragmentRequests[fragment_index];
453
454 DPRINTFS(MinorMem, (&port), "Making packet %d for request: %s"
455 " (%d, 0x%x)\n",
456 fragment_index, *inst,
457 (fragment->hasPaddr() ? "has paddr" : "no paddr"),
458 (fragment->hasPaddr() ? fragment->getPaddr() : 0));
459
460 Addr fragment_addr = fragment->getVaddr();
461 unsigned int fragment_size = fragment->getSize();
462
463 uint8_t *request_data = NULL;
464
465 if (!isLoad) {
466 /* Split data for Packets. Will become the property of the
467 * outgoing Packets */
468 request_data = new uint8_t[fragment_size];
469 std::memcpy(request_data, data + (fragment_addr - base_addr),
470 fragment_size);
471 }
472
473 assert(fragment->hasPaddr());
474
475 PacketPtr fragment_packet =
476 makePacketForRequest(fragment, isLoad, this, request_data);
477
478 fragmentPackets.push_back(fragment_packet);
479 /* Accumulate flags in parent request */
480 request->setFlags(fragment->getFlags());
481 }
482
483 /* Might as well make the overall/response packet here */
484 /* Get the physical address for the whole request/packet from the first
485 * fragment */
486 request->setPaddr(fragmentRequests[0]->getPaddr());
487 makePacket();
488 }
489
490 void
491 LSQ::SplitDataRequest::startAddrTranslation()
492 {
493 setState(LSQ::LSQRequest::InTranslation);
494
495 makeFragmentRequests();
496
497 numInTranslationFragments = 0;
498 numTranslatedFragments = 0;
499
500 /* @todo, just do these in sequence for now with
501 * a loop of:
502 * do {
503 * sendNextFragmentToTranslation ; translateTiming ; finish
504 * } while (numTranslatedFragments != numFragments);
505 */
506
507 /* Do first translation */
508 sendNextFragmentToTranslation();
509 }
510
511 PacketPtr
512 LSQ::SplitDataRequest::getHeadPacket()
513 {
514 assert(numIssuedFragments < numFragments);
515
516 return fragmentPackets[numIssuedFragments];
517 }
518
519 void
520 LSQ::SplitDataRequest::stepToNextPacket()
521 {
522 assert(numIssuedFragments < numFragments);
523
524 numIssuedFragments++;
525 }
526
527 void
528 LSQ::SplitDataRequest::retireResponse(PacketPtr response)
529 {
530 assert(numRetiredFragments < numFragments);
531
532 DPRINTFS(MinorMem, (&port), "Retiring fragment addr: 0x%x size: %d"
533 " offset: 0x%x (retired fragment num: %d) %s\n",
534 response->req->getVaddr(), response->req->getSize(),
535 request->getVaddr() - response->req->getVaddr(),
536 numRetiredFragments,
537 (fault == NoFault ? "" : fault->name()));
538
539 numRetiredFragments++;
540
541 if (skipped) {
542 /* Skip because we already knew the request had faulted or been
543 * skipped */
544 DPRINTFS(MinorMem, (&port), "Skipping this fragment\n");
545 } else if (response->isError()) {
546 /* Mark up the error and leave to execute to handle it */
547 DPRINTFS(MinorMem, (&port), "Fragment has an error, skipping\n");
548 setSkipped();
549 packet->copyError(response);
550 } else {
551 if (isLoad) {
552 if (!data) {
553 /* For a split transfer, a Packet must be constructed
554 * to contain all returning data. This is that packet's
555 * data */
556 data = new uint8_t[request->getSize()];
557 }
558
559 /* Populate the portion of the overall response data represented
560 * by the response fragment */
561 std::memcpy(
562 data + (response->req->getVaddr() - request->getVaddr()),
563 response->getConstPtr<uint8_t>(),
564 response->req->getSize());
565 }
566 }
567
568 /* Complete early if we're skipping are no more in-flight accesses */
569 if (skipped && !hasPacketsInMemSystem()) {
570 DPRINTFS(MinorMem, (&port), "Completed skipped burst\n");
571 setState(Complete);
572 if (packet->needsResponse())
573 packet->makeResponse();
574 }
575
576 if (numRetiredFragments == numFragments)
577 setState(Complete);
578
579 if (!skipped && isComplete()) {
580 DPRINTFS(MinorMem, (&port), "Completed burst %d\n", packet != NULL);
581
582 DPRINTFS(MinorMem, (&port), "Retired packet isRead: %d isWrite: %d"
583 " needsResponse: %d packetSize: %s requestSize: %s responseSize:"
584 " %s\n", packet->isRead(), packet->isWrite(),
585 packet->needsResponse(), packet->getSize(), request->getSize(),
586 response->getSize());
587
588 /* A request can become complete by several paths, this is a sanity
589 * check to make sure the packet's data is created */
590 if (!data) {
591 data = new uint8_t[request->getSize()];
592 }
593
594 if (isLoad) {
595 DPRINTFS(MinorMem, (&port), "Copying read data\n");
596 std::memcpy(packet->getPtr<uint8_t>(), data, request->getSize());
597 }
598 packet->makeResponse();
599 }
600
601 /* Packets are all deallocated together in ~SplitLSQRequest */
602 }
603
604 void
605 LSQ::SplitDataRequest::sendNextFragmentToTranslation()
606 {
607 unsigned int fragment_index = numTranslatedFragments;
608
609 ThreadContext *thread = port.cpu.getContext(
610 inst->id.threadId);
611
612 DPRINTFS(MinorMem, (&port), "Submitting DTLB request for fragment: %d\n",
613 fragment_index);
614
615 port.numAccessesInDTLB++;
616 numInTranslationFragments++;
617
618 thread->getDTBPtr()->translateTiming(
619 fragmentRequests[fragment_index], thread, this, (isLoad ?
620 BaseTLB::Read : BaseTLB::Write));
621 }
622
623 bool
624 LSQ::StoreBuffer::canInsert() const
625 {
626 /* @todo, support store amalgamation */
627 return slots.size() < numSlots;
628 }
629
630 void
631 LSQ::StoreBuffer::deleteRequest(LSQRequestPtr request)
632 {
633 auto found = std::find(slots.begin(), slots.end(), request);
634
635 if (found != slots.end()) {
636 DPRINTF(MinorMem, "Deleting request: %s %s %s from StoreBuffer\n",
637 request, *found, *(request->inst));
638 slots.erase(found);
639
640 delete request;
641 }
642 }
643
644 void
645 LSQ::StoreBuffer::insert(LSQRequestPtr request)
646 {
647 if (!canInsert()) {
648 warn("%s: store buffer insertion without space to insert from"
649 " inst: %s\n", name(), *(request->inst));
650 }
651
652 DPRINTF(MinorMem, "Pushing store: %s into store buffer\n", request);
653
654 numUnissuedAccesses++;
655
656 if (request->state != LSQRequest::Complete)
657 request->setState(LSQRequest::StoreInStoreBuffer);
658
659 slots.push_back(request);
660
661 /* Let's try and wake up the processor for the next cycle to step
662 * the store buffer */
663 lsq.cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
664 }
665
666 LSQ::AddrRangeCoverage
667 LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request,
668 unsigned int &found_slot)
669 {
670 unsigned int slot_index = slots.size() - 1;
671 auto i = slots.rbegin();
672 AddrRangeCoverage ret = NoAddrRangeCoverage;
673
674 /* Traverse the store buffer in reverse order (most to least recent)
675 * and try to find a slot whose address range overlaps this request */
676 while (ret == NoAddrRangeCoverage && i != slots.rend()) {
677 LSQRequestPtr slot = *i;
678
679 /* Cache maintenance instructions go down via the store path but
680 * they carry no data and they shouldn't be considered
681 * for forwarding */
682 if (slot->packet &&
683 slot->inst->id.threadId == request->inst->id.threadId &&
684 !slot->packet->req->isCacheMaintenance()) {
685 AddrRangeCoverage coverage = slot->containsAddrRangeOf(request);
686
687 if (coverage != NoAddrRangeCoverage) {
688 DPRINTF(MinorMem, "Forwarding: slot: %d result: %s thisAddr:"
689 " 0x%x thisSize: %d slotAddr: 0x%x slotSize: %d\n",
690 slot_index, coverage,
691 request->request->getPaddr(), request->request->getSize(),
692 slot->request->getPaddr(), slot->request->getSize());
693
694 found_slot = slot_index;
695 ret = coverage;
696 }
697 }
698
699 i++;
700 slot_index--;
701 }
702
703 return ret;
704 }
705
706 /** Fill the given packet with appropriate date from slot slot_number */
707 void
708 LSQ::StoreBuffer::forwardStoreData(LSQRequestPtr load,
709 unsigned int slot_number)
710 {
711 assert(slot_number < slots.size());
712 assert(load->packet);
713 assert(load->isLoad);
714
715 LSQRequestPtr store = slots[slot_number];
716
717 assert(store->packet);
718 assert(store->containsAddrRangeOf(load) == FullAddrRangeCoverage);
719
720 Addr load_addr = load->request->getPaddr();
721 Addr store_addr = store->request->getPaddr();
722 Addr addr_offset = load_addr - store_addr;
723
724 unsigned int load_size = load->request->getSize();
725
726 DPRINTF(MinorMem, "Forwarding %d bytes for addr: 0x%x from store buffer"
727 " slot: %d addr: 0x%x addressOffset: 0x%x\n",
728 load_size, load_addr, slot_number,
729 store_addr, addr_offset);
730
731 void *load_packet_data = load->packet->getPtr<void>();
732 void *store_packet_data = store->packet->getPtr<uint8_t>() + addr_offset;
733
734 std::memcpy(load_packet_data, store_packet_data, load_size);
735 }
736
737 void
738 LSQ::StoreBuffer::countIssuedStore(LSQRequestPtr request)
739 {
740 /* Barriers are accounted for as they are cleared from
741 * the queue, not after their transfers are complete */
742 if (!request->isBarrier())
743 numUnissuedAccesses--;
744 }
745
746 void
747 LSQ::StoreBuffer::step()
748 {
749 DPRINTF(MinorMem, "StoreBuffer step numUnissuedAccesses: %d\n",
750 numUnissuedAccesses);
751
752 if (numUnissuedAccesses != 0 && lsq.state == LSQ::MemoryRunning) {
753 /* Clear all the leading barriers */
754 while (!slots.empty() &&
755 slots.front()->isComplete() && slots.front()->isBarrier())
756 {
757 LSQRequestPtr barrier = slots.front();
758
759 DPRINTF(MinorMem, "Clearing barrier for inst: %s\n",
760 *(barrier->inst));
761
762 numUnissuedAccesses--;
763 lsq.clearMemBarrier(barrier->inst);
764 slots.pop_front();
765
766 delete barrier;
767 }
768
769 auto i = slots.begin();
770 bool issued = true;
771 unsigned int issue_count = 0;
772
773 /* Skip trying if the memory system is busy */
774 if (lsq.state == LSQ::MemoryNeedsRetry)
775 issued = false;
776
777 /* Try to issue all stores in order starting from the head
778 * of the queue. Responses are allowed to be retired
779 * out of order */
780 while (issued &&
781 issue_count < storeLimitPerCycle &&
782 lsq.canSendToMemorySystem() &&
783 i != slots.end())
784 {
785 LSQRequestPtr request = *i;
786
787 DPRINTF(MinorMem, "Considering request: %s, sentAllPackets: %d"
788 " state: %s\n",
789 *(request->inst), request->sentAllPackets(),
790 request->state);
791
792 if (request->isBarrier() && request->isComplete()) {
793 /* Give up at barriers */
794 issued = false;
795 } else if (!(request->state == LSQRequest::StoreBufferIssuing &&
796 request->sentAllPackets()))
797 {
798 DPRINTF(MinorMem, "Trying to send request: %s to memory"
799 " system\n", *(request->inst));
800
801 if (lsq.tryToSend(request)) {
802 countIssuedStore(request);
803 issue_count++;
804 } else {
805 /* Don't step on to the next store buffer entry if this
806 * one hasn't issued all its packets as the store
807 * buffer must still enforce ordering */
808 issued = false;
809 }
810 }
811 i++;
812 }
813 }
814 }
815
816 void
817 LSQ::completeMemBarrierInst(MinorDynInstPtr inst,
818 bool committed)
819 {
820 if (committed) {
821 /* Not already sent to the store buffer as a store request? */
822 if (!inst->inStoreBuffer) {
823 /* Insert an entry into the store buffer to tick off barriers
824 * until there are none in flight */
825 storeBuffer.insert(new BarrierDataRequest(*this, inst));
826 }
827 } else {
828 /* Clear the barrier anyway if it wasn't actually committed */
829 clearMemBarrier(inst);
830 }
831 }
832
833 void
834 LSQ::StoreBuffer::minorTrace() const
835 {
836 unsigned int size = slots.size();
837 unsigned int i = 0;
838 std::ostringstream os;
839
840 while (i < size) {
841 LSQRequestPtr request = slots[i];
842
843 request->reportData(os);
844
845 i++;
846 if (i < numSlots)
847 os << ',';
848 }
849
850 while (i < numSlots) {
851 os << '-';
852
853 i++;
854 if (i < numSlots)
855 os << ',';
856 }
857
858 MINORTRACE("addr=%s num_unissued_stores=%d\n", os.str(),
859 numUnissuedAccesses);
860 }
861
862 void
863 LSQ::tryToSendToTransfers(LSQRequestPtr request)
864 {
865 if (state == MemoryNeedsRetry) {
866 DPRINTF(MinorMem, "Request needs retry, not issuing to"
867 " memory until retry arrives\n");
868 return;
869 }
870
871 if (request->state == LSQRequest::InTranslation) {
872 DPRINTF(MinorMem, "Request still in translation, not issuing to"
873 " memory\n");
874 return;
875 }
876
877 assert(request->state == LSQRequest::Translated ||
878 request->state == LSQRequest::RequestIssuing ||
879 request->state == LSQRequest::Failed ||
880 request->state == LSQRequest::Complete);
881
882 if (requests.empty() || requests.front() != request) {
883 DPRINTF(MinorMem, "Request not at front of requests queue, can't"
884 " issue to memory\n");
885 return;
886 }
887
888 if (transfers.unreservedRemainingSpace() == 0) {
889 DPRINTF(MinorMem, "No space to insert request into transfers"
890 " queue\n");
891 return;
892 }
893
894 if (request->isComplete() || request->state == LSQRequest::Failed) {
895 DPRINTF(MinorMem, "Passing a %s transfer on to transfers"
896 " queue\n", (request->isComplete() ? "completed" : "failed"));
897 request->setState(LSQRequest::Complete);
898 request->setSkipped();
899 moveFromRequestsToTransfers(request);
900 return;
901 }
902
903 if (!execute.instIsRightStream(request->inst)) {
904 /* Wrong stream, try to abort the transfer but only do so if
905 * there are no packets in flight */
906 if (request->hasPacketsInMemSystem()) {
907 DPRINTF(MinorMem, "Request's inst. is from the wrong stream,"
908 " waiting for responses before aborting request\n");
909 } else {
910 DPRINTF(MinorMem, "Request's inst. is from the wrong stream,"
911 " aborting request\n");
912 request->setState(LSQRequest::Complete);
913 request->setSkipped();
914 moveFromRequestsToTransfers(request);
915 }
916 return;
917 }
918
919 if (request->fault != NoFault) {
920 if (request->inst->staticInst->isPrefetch()) {
921 DPRINTF(MinorMem, "Not signalling fault for faulting prefetch\n");
922 }
923 DPRINTF(MinorMem, "Moving faulting request into the transfers"
924 " queue\n");
925 request->setState(LSQRequest::Complete);
926 request->setSkipped();
927 moveFromRequestsToTransfers(request);
928 return;
929 }
930
931 bool is_load = request->isLoad;
932 bool is_llsc = request->request->isLLSC();
933 bool is_swap = request->request->isSwap();
934 bool is_atomic = request->request->isAtomic();
935 bool bufferable = !(request->request->isStrictlyOrdered() ||
936 is_llsc || is_swap || is_atomic);
937
938 if (is_load) {
939 if (numStoresInTransfers != 0) {
940 DPRINTF(MinorMem, "Load request with stores still in transfers"
941 " queue, stalling\n");
942 return;
943 }
944 } else {
945 /* Store. Can it be sent to the store buffer? */
946 if (bufferable && !request->request->isMmappedIpr()) {
947 request->setState(LSQRequest::StoreToStoreBuffer);
948 moveFromRequestsToTransfers(request);
949 DPRINTF(MinorMem, "Moving store into transfers queue\n");
950 return;
951 }
952 }
953
954 /* Check if this is the head instruction (and so must be executable as
955 * its stream sequence number was checked above) for loads which must
956 * not be speculatively issued and stores which must be issued here */
957 if (!bufferable) {
958 if (!execute.instIsHeadInst(request->inst)) {
959 DPRINTF(MinorMem, "Memory access not the head inst., can't be"
960 " sure it can be performed, not issuing\n");
961 return;
962 }
963
964 unsigned int forwarding_slot = 0;
965
966 if (storeBuffer.canForwardDataToLoad(request, forwarding_slot) !=
967 NoAddrRangeCoverage)
968 {
969 // There's at least another request that targets the same
970 // address and is staying in the storeBuffer. Since our
971 // request is non-bufferable (e.g., strictly ordered or atomic),
972 // we must wait for the other request in the storeBuffer to
973 // complete before we can issue this non-bufferable request.
974 // This is to make sure that the order they access the cache is
975 // correct.
976 DPRINTF(MinorMem, "Memory access can receive forwarded data"
977 " from the store buffer, but need to wait for store buffer"
978 " to drain\n");
979 return;
980 }
981 }
982
983 /* True: submit this packet to the transfers queue to be sent to the
984 * memory system.
985 * False: skip the memory and push a packet for this request onto
986 * requests */
987 bool do_access = true;
988
989 if (!is_llsc) {
990 /* Check for match in the store buffer */
991 if (is_load) {
992 unsigned int forwarding_slot = 0;
993 AddrRangeCoverage forwarding_result =
994 storeBuffer.canForwardDataToLoad(request,
995 forwarding_slot);
996
997 switch (forwarding_result) {
998 case FullAddrRangeCoverage:
999 /* Forward data from the store buffer into this request and
1000 * repurpose this request's packet into a response packet */
1001 storeBuffer.forwardStoreData(request, forwarding_slot);
1002 request->packet->makeResponse();
1003
1004 /* Just move between queues, no access */
1005 do_access = false;
1006 break;
1007 case PartialAddrRangeCoverage:
1008 DPRINTF(MinorMem, "Load partly satisfied by store buffer"
1009 " data. Must wait for the store to complete\n");
1010 return;
1011 break;
1012 case NoAddrRangeCoverage:
1013 DPRINTF(MinorMem, "No forwardable data from store buffer\n");
1014 /* Fall through to try access */
1015 break;
1016 }
1017 }
1018 } else {
1019 if (!canSendToMemorySystem()) {
1020 DPRINTF(MinorMem, "Can't send request to memory system yet\n");
1021 return;
1022 }
1023
1024 SimpleThread &thread = *cpu.threads[request->inst->id.threadId];
1025
1026 TheISA::PCState old_pc = thread.pcState();
1027 ExecContext context(cpu, thread, execute, request->inst);
1028
1029 /* Handle LLSC requests and tests */
1030 if (is_load) {
1031 TheISA::handleLockedRead(&context, request->request);
1032 } else {
1033 do_access = TheISA::handleLockedWrite(&context,
1034 request->request, cacheBlockMask);
1035
1036 if (!do_access) {
1037 DPRINTF(MinorMem, "Not perfoming a memory "
1038 "access for store conditional\n");
1039 }
1040 }
1041 thread.pcState(old_pc);
1042 }
1043
1044 /* See the do_access comment above */
1045 if (do_access) {
1046 if (!canSendToMemorySystem()) {
1047 DPRINTF(MinorMem, "Can't send request to memory system yet\n");
1048 return;
1049 }
1050
1051 /* Remember if this is an access which can't be idly
1052 * discarded by an interrupt */
1053 if (!bufferable && !request->issuedToMemory) {
1054 numAccessesIssuedToMemory++;
1055 request->issuedToMemory = true;
1056 }
1057
1058 if (tryToSend(request)) {
1059 moveFromRequestsToTransfers(request);
1060 }
1061 } else {
1062 request->setState(LSQRequest::Complete);
1063 moveFromRequestsToTransfers(request);
1064 }
1065 }
1066
1067 bool
1068 LSQ::tryToSend(LSQRequestPtr request)
1069 {
1070 bool ret = false;
1071
1072 if (!canSendToMemorySystem()) {
1073 DPRINTF(MinorMem, "Can't send request: %s yet, no space in memory\n",
1074 *(request->inst));
1075 } else {
1076 PacketPtr packet = request->getHeadPacket();
1077
1078 DPRINTF(MinorMem, "Trying to send request: %s addr: 0x%x\n",
1079 *(request->inst), packet->req->getVaddr());
1080
1081 /* The sender state of the packet *must* be an LSQRequest
1082 * so the response can be correctly handled */
1083 assert(packet->findNextSenderState<LSQRequest>());
1084
1085 if (request->request->isMmappedIpr()) {
1086 ThreadContext *thread =
1087 cpu.getContext(cpu.contextToThread(
1088 request->request->contextId()));
1089
1090 if (request->isLoad) {
1091 DPRINTF(MinorMem, "IPR read inst: %s\n", *(request->inst));
1092 TheISA::handleIprRead(thread, packet);
1093 } else {
1094 DPRINTF(MinorMem, "IPR write inst: %s\n", *(request->inst));
1095 TheISA::handleIprWrite(thread, packet);
1096 }
1097
1098 request->stepToNextPacket();
1099 ret = request->sentAllPackets();
1100
1101 if (!ret) {
1102 DPRINTF(MinorMem, "IPR access has another packet: %s\n",
1103 *(request->inst));
1104 }
1105
1106 if (ret)
1107 request->setState(LSQRequest::Complete);
1108 else
1109 request->setState(LSQRequest::RequestIssuing);
1110 } else if (dcachePort.sendTimingReq(packet)) {
1111 DPRINTF(MinorMem, "Sent data memory request\n");
1112
1113 numAccessesInMemorySystem++;
1114
1115 request->stepToNextPacket();
1116
1117 ret = request->sentAllPackets();
1118
1119 switch (request->state) {
1120 case LSQRequest::Translated:
1121 case LSQRequest::RequestIssuing:
1122 /* Fully or partially issued a request in the transfers
1123 * queue */
1124 request->setState(LSQRequest::RequestIssuing);
1125 break;
1126 case LSQRequest::StoreInStoreBuffer:
1127 case LSQRequest::StoreBufferIssuing:
1128 /* Fully or partially issued a request in the store
1129 * buffer */
1130 request->setState(LSQRequest::StoreBufferIssuing);
1131 break;
1132 default:
1133 panic("Unrecognized LSQ request state %d.", request->state);
1134 }
1135
1136 state = MemoryRunning;
1137 } else {
1138 DPRINTF(MinorMem,
1139 "Sending data memory request - needs retry\n");
1140
1141 /* Needs to be resent, wait for that */
1142 state = MemoryNeedsRetry;
1143 retryRequest = request;
1144
1145 switch (request->state) {
1146 case LSQRequest::Translated:
1147 case LSQRequest::RequestIssuing:
1148 request->setState(LSQRequest::RequestNeedsRetry);
1149 break;
1150 case LSQRequest::StoreInStoreBuffer:
1151 case LSQRequest::StoreBufferIssuing:
1152 request->setState(LSQRequest::StoreBufferNeedsRetry);
1153 break;
1154 default:
1155 panic("Unrecognized LSQ request state %d.", request->state);
1156 }
1157 }
1158 }
1159
1160 if (ret)
1161 threadSnoop(request);
1162
1163 return ret;
1164 }
1165
1166 void
1167 LSQ::moveFromRequestsToTransfers(LSQRequestPtr request)
1168 {
1169 assert(!requests.empty() && requests.front() == request);
1170 assert(transfers.unreservedRemainingSpace() != 0);
1171
1172 /* Need to count the number of stores in the transfers
1173 * queue so that loads know when their store buffer forwarding
1174 * results will be correct (only when all those stores
1175 * have reached the store buffer) */
1176 if (!request->isLoad)
1177 numStoresInTransfers++;
1178
1179 requests.pop();
1180 transfers.push(request);
1181 }
1182
1183 bool
1184 LSQ::canSendToMemorySystem()
1185 {
1186 return state == MemoryRunning &&
1187 numAccessesInMemorySystem < inMemorySystemLimit;
1188 }
1189
1190 bool
1191 LSQ::recvTimingResp(PacketPtr response)
1192 {
1193 LSQRequestPtr request =
1194 safe_cast<LSQRequestPtr>(response->popSenderState());
1195
1196 DPRINTF(MinorMem, "Received response packet inst: %s"
1197 " addr: 0x%x cmd: %s\n",
1198 *(request->inst), response->getAddr(),
1199 response->cmd.toString());
1200
1201 numAccessesInMemorySystem--;
1202
1203 if (response->isError()) {
1204 DPRINTF(MinorMem, "Received error response packet: %s\n",
1205 *request->inst);
1206 }
1207
1208 switch (request->state) {
1209 case LSQRequest::RequestIssuing:
1210 case LSQRequest::RequestNeedsRetry:
1211 /* Response to a request from the transfers queue */
1212 request->retireResponse(response);
1213
1214 DPRINTF(MinorMem, "Has outstanding packets?: %d %d\n",
1215 request->hasPacketsInMemSystem(), request->isComplete());
1216
1217 break;
1218 case LSQRequest::StoreBufferIssuing:
1219 case LSQRequest::StoreBufferNeedsRetry:
1220 /* Response to a request from the store buffer */
1221 request->retireResponse(response);
1222
1223 /* Remove completed requests unless they are barriers (which will
1224 * need to be removed in order */
1225 if (request->isComplete()) {
1226 if (!request->isBarrier()) {
1227 storeBuffer.deleteRequest(request);
1228 } else {
1229 DPRINTF(MinorMem, "Completed transfer for barrier: %s"
1230 " leaving the request as it is also a barrier\n",
1231 *(request->inst));
1232 }
1233 }
1234 break;
1235 default:
1236 panic("Shouldn't be allowed to receive a response from another state");
1237 }
1238
1239 /* We go to idle even if there are more things in the requests queue
1240 * as it's the job of step to actually step us on to the next
1241 * transaction */
1242
1243 /* Let's try and wake up the processor for the next cycle */
1244 cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
1245
1246 /* Never busy */
1247 return true;
1248 }
1249
1250 void
1251 LSQ::recvReqRetry()
1252 {
1253 DPRINTF(MinorMem, "Received retry request\n");
1254
1255 assert(state == MemoryNeedsRetry);
1256
1257 switch (retryRequest->state) {
1258 case LSQRequest::RequestNeedsRetry:
1259 /* Retry in the requests queue */
1260 retryRequest->setState(LSQRequest::Translated);
1261 break;
1262 case LSQRequest::StoreBufferNeedsRetry:
1263 /* Retry in the store buffer */
1264 retryRequest->setState(LSQRequest::StoreInStoreBuffer);
1265 break;
1266 default:
1267 panic("Unrecognized retry request state %d.", retryRequest->state);
1268 }
1269
1270 /* Set state back to MemoryRunning so that the following
1271 * tryToSend can actually send. Note that this won't
1272 * allow another transfer in as tryToSend should
1273 * issue a memory request and either succeed for this
1274 * request or return the LSQ back to MemoryNeedsRetry */
1275 state = MemoryRunning;
1276
1277 /* Try to resend the request */
1278 if (tryToSend(retryRequest)) {
1279 /* Successfully sent, need to move the request */
1280 switch (retryRequest->state) {
1281 case LSQRequest::RequestIssuing:
1282 /* In the requests queue */
1283 moveFromRequestsToTransfers(retryRequest);
1284 break;
1285 case LSQRequest::StoreBufferIssuing:
1286 /* In the store buffer */
1287 storeBuffer.countIssuedStore(retryRequest);
1288 break;
1289 default:
1290 panic("Unrecognized retry request state %d.", retryRequest->state);
1291 }
1292
1293 retryRequest = NULL;
1294 }
1295 }
1296
1297 LSQ::LSQ(std::string name_, std::string dcache_port_name_,
1298 MinorCPU &cpu_, Execute &execute_,
1299 unsigned int in_memory_system_limit, unsigned int line_width,
1300 unsigned int requests_queue_size, unsigned int transfers_queue_size,
1301 unsigned int store_buffer_size,
1302 unsigned int store_buffer_cycle_store_limit) :
1303 Named(name_),
1304 cpu(cpu_),
1305 execute(execute_),
1306 dcachePort(dcache_port_name_, *this, cpu_),
1307 lastMemBarrier(cpu.numThreads, 0),
1308 state(MemoryRunning),
1309 inMemorySystemLimit(in_memory_system_limit),
1310 lineWidth((line_width == 0 ? cpu.cacheLineSize() : line_width)),
1311 requests(name_ + ".requests", "addr", requests_queue_size),
1312 transfers(name_ + ".transfers", "addr", transfers_queue_size),
1313 storeBuffer(name_ + ".storeBuffer",
1314 *this, store_buffer_size, store_buffer_cycle_store_limit),
1315 numAccessesInMemorySystem(0),
1316 numAccessesInDTLB(0),
1317 numStoresInTransfers(0),
1318 numAccessesIssuedToMemory(0),
1319 retryRequest(NULL),
1320 cacheBlockMask(~(cpu_.cacheLineSize() - 1))
1321 {
1322 if (in_memory_system_limit < 1) {
1323 fatal("%s: executeMaxAccessesInMemory must be >= 1 (%d)\n", name_,
1324 in_memory_system_limit);
1325 }
1326
1327 if (store_buffer_cycle_store_limit < 1) {
1328 fatal("%s: executeLSQMaxStoreBufferStoresPerCycle must be"
1329 " >= 1 (%d)\n", name_, store_buffer_cycle_store_limit);
1330 }
1331
1332 if (requests_queue_size < 1) {
1333 fatal("%s: executeLSQRequestsQueueSize must be"
1334 " >= 1 (%d)\n", name_, requests_queue_size);
1335 }
1336
1337 if (transfers_queue_size < 1) {
1338 fatal("%s: executeLSQTransfersQueueSize must be"
1339 " >= 1 (%d)\n", name_, transfers_queue_size);
1340 }
1341
1342 if (store_buffer_size < 1) {
1343 fatal("%s: executeLSQStoreBufferSize must be"
1344 " >= 1 (%d)\n", name_, store_buffer_size);
1345 }
1346
1347 if ((lineWidth & (lineWidth - 1)) != 0) {
1348 fatal("%s: lineWidth: %d must be a power of 2\n", name(), lineWidth);
1349 }
1350 }
1351
1352 LSQ::~LSQ()
1353 { }
1354
1355 LSQ::LSQRequest::~LSQRequest()
1356 {
1357 if (packet)
1358 delete packet;
1359 if (data)
1360 delete [] data;
1361 }
1362
1363 /**
1364 * Step the memory access mechanism on to its next state. In reality, most
1365 * of the stepping is done by the callbacks on the LSQ but this
1366 * function is responsible for issuing memory requests lodged in the
1367 * requests queue.
1368 */
1369 void
1370 LSQ::step()
1371 {
1372 /* Try to move address-translated requests between queues and issue
1373 * them */
1374 if (!requests.empty())
1375 tryToSendToTransfers(requests.front());
1376
1377 storeBuffer.step();
1378 }
1379
1380 LSQ::LSQRequestPtr
1381 LSQ::findResponse(MinorDynInstPtr inst)
1382 {
1383 LSQ::LSQRequestPtr ret = NULL;
1384
1385 if (!transfers.empty()) {
1386 LSQRequestPtr request = transfers.front();
1387
1388 /* Same instruction and complete access or a store that's
1389 * capable of being moved to the store buffer */
1390 if (request->inst->id == inst->id) {
1391 bool complete = request->isComplete();
1392 bool can_store = storeBuffer.canInsert();
1393 bool to_store_buffer = request->state ==
1394 LSQRequest::StoreToStoreBuffer;
1395
1396 if ((complete && !(request->isBarrier() && !can_store)) ||
1397 (to_store_buffer && can_store))
1398 {
1399 ret = request;
1400 }
1401 }
1402 }
1403
1404 if (ret) {
1405 DPRINTF(MinorMem, "Found matching memory response for inst: %s\n",
1406 *inst);
1407 } else {
1408 DPRINTF(MinorMem, "No matching memory response for inst: %s\n",
1409 *inst);
1410 }
1411
1412 return ret;
1413 }
1414
1415 void
1416 LSQ::popResponse(LSQ::LSQRequestPtr response)
1417 {
1418 assert(!transfers.empty() && transfers.front() == response);
1419
1420 transfers.pop();
1421
1422 if (!response->isLoad)
1423 numStoresInTransfers--;
1424
1425 if (response->issuedToMemory)
1426 numAccessesIssuedToMemory--;
1427
1428 if (response->state != LSQRequest::StoreInStoreBuffer) {
1429 DPRINTF(MinorMem, "Deleting %s request: %s\n",
1430 (response->isLoad ? "load" : "store"),
1431 *(response->inst));
1432
1433 delete response;
1434 }
1435 }
1436
1437 void
1438 LSQ::sendStoreToStoreBuffer(LSQRequestPtr request)
1439 {
1440 assert(request->state == LSQRequest::StoreToStoreBuffer);
1441
1442 DPRINTF(MinorMem, "Sending store: %s to store buffer\n",
1443 *(request->inst));
1444
1445 request->inst->inStoreBuffer = true;
1446
1447 storeBuffer.insert(request);
1448 }
1449
1450 bool
1451 LSQ::isDrained()
1452 {
1453 return requests.empty() && transfers.empty() &&
1454 storeBuffer.isDrained();
1455 }
1456
1457 bool
1458 LSQ::needsToTick()
1459 {
1460 bool ret = false;
1461
1462 if (canSendToMemorySystem()) {
1463 bool have_translated_requests = !requests.empty() &&
1464 requests.front()->state != LSQRequest::InTranslation &&
1465 transfers.unreservedRemainingSpace() != 0;
1466
1467 ret = have_translated_requests ||
1468 storeBuffer.numUnissuedStores() != 0;
1469 }
1470
1471 if (ret)
1472 DPRINTF(Activity, "Need to tick\n");
1473
1474 return ret;
1475 }
1476
1477 void
1478 LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
1479 unsigned int size, Addr addr, Request::Flags flags,
1480 uint64_t *res, AtomicOpFunctor *amo_op)
1481 {
1482 bool needs_burst = transferNeedsBurst(addr, size, lineWidth);
1483
1484 if (needs_burst && inst->staticInst->isAtomic()) {
1485 // AMO requests that access across a cache line boundary are not
1486 // allowed since the cache does not guarantee AMO ops to be executed
1487 // atomically in two cache lines
1488 // For ISAs such as x86 that requires AMO operations to work on
1489 // accesses that cross cache-line boundaries, the cache needs to be
1490 // modified to support locking both cache lines to guarantee the
1491 // atomicity.
1492 panic("Do not expect cross-cache-line atomic memory request\n");
1493 }
1494
1495 LSQRequestPtr request;
1496
1497 /* Copy given data into the request. The request will pass this to the
1498 * packet and then it will own the data */
1499 uint8_t *request_data = NULL;
1500
1501 DPRINTF(MinorMem, "Pushing request (%s) addr: 0x%x size: %d flags:"
1502 " 0x%x%s lineWidth : 0x%x\n",
1503 (isLoad ? "load" : "store/atomic"), addr, size, flags,
1504 (needs_burst ? " (needs burst)" : ""), lineWidth);
1505
1506 if (!isLoad) {
1507 /* Request_data becomes the property of a ...DataRequest (see below)
1508 * and destroyed by its destructor */
1509 request_data = new uint8_t[size];
1510 if (inst->staticInst->isAtomic() ||
1511 (flags & Request::STORE_NO_DATA)) {
1512 /* For atomic or store-no-data, just use zeroed data */
1513 std::memset(request_data, 0, size);
1514 } else {
1515 std::memcpy(request_data, data, size);
1516 }
1517 }
1518
1519 if (needs_burst) {
1520 request = new SplitDataRequest(
1521 *this, inst, isLoad, request_data, res);
1522 } else {
1523 request = new SingleDataRequest(
1524 *this, inst, isLoad, request_data, res);
1525 }
1526
1527 if (inst->traceData)
1528 inst->traceData->setMem(addr, size, flags);
1529
1530 int cid = cpu.threads[inst->id.threadId]->getTC()->contextId();
1531 request->request->setContext(cid);
1532 request->request->setVirt(0 /* asid */,
1533 addr, size, flags, cpu.dataMasterId(),
1534 /* I've no idea why we need the PC, but give it */
1535 inst->pc.instAddr(), amo_op);
1536
1537 requests.push(request);
1538 request->startAddrTranslation();
1539 }
1540
1541 void
1542 LSQ::pushFailedRequest(MinorDynInstPtr inst)
1543 {
1544 LSQRequestPtr request = new FailedDataRequest(*this, inst);
1545 requests.push(request);
1546 }
1547
1548 void
1549 LSQ::minorTrace() const
1550 {
1551 MINORTRACE("state=%s in_tlb_mem=%d/%d stores_in_transfers=%d"
1552 " lastMemBarrier=%d\n",
1553 state, numAccessesInDTLB, numAccessesInMemorySystem,
1554 numStoresInTransfers, lastMemBarrier[0]);
1555 requests.minorTrace();
1556 transfers.minorTrace();
1557 storeBuffer.minorTrace();
1558 }
1559
1560 LSQ::StoreBuffer::StoreBuffer(std::string name_, LSQ &lsq_,
1561 unsigned int store_buffer_size,
1562 unsigned int store_limit_per_cycle) :
1563 Named(name_), lsq(lsq_),
1564 numSlots(store_buffer_size),
1565 storeLimitPerCycle(store_limit_per_cycle),
1566 slots(),
1567 numUnissuedAccesses(0)
1568 {
1569 }
1570
1571 PacketPtr
1572 makePacketForRequest(const RequestPtr &request, bool isLoad,
1573 Packet::SenderState *sender_state, PacketDataPtr data)
1574 {
1575 PacketPtr ret = isLoad ? Packet::createRead(request)
1576 : Packet::createWrite(request);
1577
1578 if (sender_state)
1579 ret->pushSenderState(sender_state);
1580
1581 if (isLoad) {
1582 ret->allocate();
1583 } else if (!request->isCacheMaintenance()) {
1584 // CMOs are treated as stores but they don't have data. All
1585 // stores otherwise need to allocate for data.
1586 ret->dataDynamic(data);
1587 }
1588
1589 return ret;
1590 }
1591
1592 void
1593 LSQ::issuedMemBarrierInst(MinorDynInstPtr inst)
1594 {
1595 assert(inst->isInst() && inst->staticInst->isMemBarrier());
1596 assert(inst->id.execSeqNum > lastMemBarrier[inst->id.threadId]);
1597
1598 /* Remember the barrier. We only have a notion of one
1599 * barrier so this may result in some mem refs being
1600 * delayed if they are between barriers */
1601 lastMemBarrier[inst->id.threadId] = inst->id.execSeqNum;
1602 }
1603
1604 void
1605 LSQ::LSQRequest::makePacket()
1606 {
1607 /* Make the function idempotent */
1608 if (packet)
1609 return;
1610
1611 // if the translation faulted, do not create a packet
1612 if (fault != NoFault) {
1613 assert(packet == NULL);
1614 return;
1615 }
1616
1617 packet = makePacketForRequest(request, isLoad, this, data);
1618 /* Null the ret data so we know not to deallocate it when the
1619 * ret is destroyed. The data now belongs to the ret and
1620 * the ret is responsible for its destruction */
1621 data = NULL;
1622 }
1623
1624 std::ostream &
1625 operator <<(std::ostream &os, LSQ::MemoryState state)
1626 {
1627 switch (state) {
1628 case LSQ::MemoryRunning:
1629 os << "MemoryRunning";
1630 break;
1631 case LSQ::MemoryNeedsRetry:
1632 os << "MemoryNeedsRetry";
1633 break;
1634 default:
1635 os << "MemoryState-" << static_cast<int>(state);
1636 break;
1637 }
1638 return os;
1639 }
1640
1641 void
1642 LSQ::recvTimingSnoopReq(PacketPtr pkt)
1643 {
1644 /* LLSC operations in Minor can't be speculative and are executed from
1645 * the head of the requests queue. We shouldn't need to do more than
1646 * this action on snoops. */
1647 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1648 if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1649 cpu.wakeup(tid);
1650 }
1651 }
1652
1653 if (pkt->isInvalidate() || pkt->isWrite()) {
1654 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1655 TheISA::handleLockedSnoop(cpu.getContext(tid), pkt,
1656 cacheBlockMask);
1657 }
1658 }
1659 }
1660
1661 void
1662 LSQ::threadSnoop(LSQRequestPtr request)
1663 {
1664 /* LLSC operations in Minor can't be speculative and are executed from
1665 * the head of the requests queue. We shouldn't need to do more than
1666 * this action on snoops. */
1667 ThreadID req_tid = request->inst->id.threadId;
1668 PacketPtr pkt = request->packet;
1669
1670 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1671 if (tid != req_tid) {
1672 if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1673 cpu.wakeup(tid);
1674 }
1675
1676 if (pkt->isInvalidate() || pkt->isWrite()) {
1677 TheISA::handleLockedSnoop(cpu.getContext(tid), pkt,
1678 cacheBlockMask);
1679 }
1680 }
1681 }
1682 }
1683
1684 }