gpu-compute: Create CU's ports in the standard way
[gem5.git] / src / gpu-compute / fetch_unit.cc
1 /*
2 * Copyright (c) 2014-2017 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include "gpu-compute/fetch_unit.hh"
35
36 #include "debug/GPUFetch.hh"
37 #include "debug/GPUPort.hh"
38 #include "debug/GPUTLB.hh"
39 #include "gpu-compute/compute_unit.hh"
40 #include "gpu-compute/gpu_dyn_inst.hh"
41 #include "gpu-compute/gpu_static_inst.hh"
42 #include "gpu-compute/shader.hh"
43 #include "gpu-compute/wavefront.hh"
44 #include "mem/ruby/system/RubySystem.hh"
45
46 uint32_t FetchUnit::globalFetchUnitID;
47
48 FetchUnit::FetchUnit(const ComputeUnitParams *p, ComputeUnit &cu)
49 : timingSim(true), computeUnit(cu), fetchScheduler(p),
50 waveList(nullptr), fetchDepth(p->fetch_depth)
51 {
52 }
53
54 FetchUnit::~FetchUnit()
55 {
56 fetchQueue.clear();
57 fetchStatusQueue.clear();
58 }
59
60 void
61 FetchUnit::init()
62 {
63 timingSim = computeUnit.shader->timingSim;
64 fetchQueue.clear();
65 fetchStatusQueue.resize(computeUnit.shader->n_wf);
66 fetchBuf.resize(computeUnit.shader->n_wf, FetchBufDesc());
67
68 for (int i = 0; i < computeUnit.shader->n_wf; ++i) {
69 Wavefront *wf = waveList->at(i);
70 assert(wf->wfSlotId == i);
71 fetchStatusQueue[i] = std::make_pair(wf, false);
72 fetchBuf[i].allocateBuf(fetchDepth, computeUnit.cacheLineSize(), wf);
73 fetchBuf[i].decoder(&decoder);
74 }
75
76 fetchScheduler.bindList(&fetchQueue);
77 }
78
79 void
80 FetchUnit::exec()
81 {
82 /**
83 * now we check if any of the fetch buffers have
84 * buffered instruction data that can be decoded
85 * and sent to its wavefront's instruction buffer.
86 * then we check if any of the fetch buffer entries
87 * can be released. we only check if we can
88 * release a buffer
89 */
90 for (auto &fetch_buf : fetchBuf) {
91 if (!fetch_buf.hasFreeSpace()) {
92 fetch_buf.checkWaveReleaseBuf();
93 }
94 if (fetch_buf.hasFetchDataToProcess()) {
95 fetch_buf.decodeInsts();
96 }
97 }
98
99 // re-evaluate waves which are marked as not ready for fetch
100 for (int j = 0; j < computeUnit.shader->n_wf; ++j) {
101 // Following code assumes 64-bit opertaion and all insts are
102 // represented by 64-bit pointers to inst objects.
103 Wavefront *curWave = fetchStatusQueue[j].first;
104 assert (curWave);
105
106 // The wavefront has to be active, the IB occupancy has to be
107 // 4 or less instructions and it can not have any branches to
108 // prevent speculative instruction fetches
109 if (!fetchStatusQueue[j].second) {
110 if ((curWave->getStatus() == Wavefront::S_RUNNING ||
111 curWave->getStatus() == Wavefront::S_WAITCNT) &&
112 fetchBuf[j].hasFreeSpace() &&
113 !curWave->stopFetch() &&
114 !curWave->pendingFetch) {
115 fetchQueue.push_back(curWave);
116 fetchStatusQueue[j].second = true;
117 }
118 }
119 }
120
121 // Fetch only if there is some wave ready to be fetched
122 // An empty fetchQueue will cause the schedular to panic
123 if (fetchQueue.size()) {
124 Wavefront *waveToBeFetched = fetchScheduler.chooseWave();
125 waveToBeFetched->pendingFetch = true;
126 fetchStatusQueue[waveToBeFetched->wfSlotId].second = false;
127 initiateFetch(waveToBeFetched);
128 }
129 }
130
131 void
132 FetchUnit::initiateFetch(Wavefront *wavefront)
133 {
134 assert(fetchBuf.at(wavefront->wfSlotId).hasFreeSpace());
135
136 /**
137 * calculate the virtual address to fetch from the SQC. the fetch
138 * buffer holds a configurable number of cache lines. we start
139 * fetching at the address of the cache line immediately following
140 * the buffered line(s).
141 */
142 Addr vaddr = fetchBuf.at(wavefront->wfSlotId).nextFetchAddr();
143
144 // this should already be aligned to a cache line
145 assert(vaddr == makeLineAddress(vaddr,
146 computeUnit.getCacheLineBits()));
147
148 // shouldn't be fetching a line that is already buffered
149 assert(!fetchBuf.at(wavefront->wfSlotId).pcBuffered(vaddr));
150
151 fetchBuf.at(wavefront->wfSlotId).reserveBuf(vaddr);
152
153 DPRINTF(GPUFetch, "CU%d: WF[%d][%d]: Id%d: Initiate fetch "
154 "from pc: %d %#x\n", computeUnit.cu_id, wavefront->simdId,
155 wavefront->wfSlotId, wavefront->wfDynId, wavefront->pc(), vaddr);
156
157 DPRINTF(GPUTLB, "CU%d: WF[%d][%d]: Initiating fetch translation: %#x\n",
158 computeUnit.cu_id, wavefront->simdId, wavefront->wfSlotId, vaddr);
159
160 // set up virtual request
161 RequestPtr req = std::make_shared<Request>(
162 vaddr, computeUnit.cacheLineSize(), Request::INST_FETCH,
163 computeUnit.masterId(), 0, 0, nullptr);
164
165 PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
166
167 if (timingSim) {
168 // SenderState needed on Return
169 pkt->senderState = new ComputeUnit::ITLBPort::SenderState(wavefront);
170
171 // Sender State needed by TLB hierarchy
172 pkt->senderState =
173 new TheISA::GpuTLB::TranslationState(BaseTLB::Execute,
174 computeUnit.shader->gpuTc,
175 false, pkt->senderState);
176
177 if (computeUnit.sqcTLBPort.isStalled()) {
178 assert(computeUnit.sqcTLBPort.retries.size() > 0);
179
180 DPRINTF(GPUTLB, "Failed to send TLB req for FETCH addr %#x\n",
181 vaddr);
182
183 computeUnit.sqcTLBPort.retries.push_back(pkt);
184 } else if (!computeUnit.sqcTLBPort.sendTimingReq(pkt)) {
185 // Stall the data port;
186 // No more packet is issued till
187 // ruby indicates resources are freed by
188 // a recvReqRetry() call back on this port.
189 computeUnit.sqcTLBPort.stallPort();
190
191 DPRINTF(GPUTLB, "Failed to send TLB req for FETCH addr %#x\n",
192 vaddr);
193
194 computeUnit.sqcTLBPort.retries.push_back(pkt);
195 } else {
196 DPRINTF(GPUTLB, "sent FETCH translation request for %#x\n", vaddr);
197 }
198 } else {
199 pkt->senderState =
200 new TheISA::GpuTLB::TranslationState(BaseTLB::Execute,
201 computeUnit.shader->gpuTc);
202
203 computeUnit.sqcTLBPort.sendFunctional(pkt);
204
205 TheISA::GpuTLB::TranslationState *sender_state =
206 safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
207
208 delete sender_state->tlbEntry;
209 delete sender_state;
210 // fetch the instructions from the SQC when we operate in
211 // functional mode only
212 fetch(pkt, wavefront);
213 }
214 }
215
216 void
217 FetchUnit::fetch(PacketPtr pkt, Wavefront *wavefront)
218 {
219 assert(pkt->req->hasPaddr());
220 assert(pkt->req->hasSize());
221
222 DPRINTF(GPUFetch, "CU%d: WF[%d][%d]: Fetch Access: %#x\n",
223 computeUnit.cu_id, wavefront->simdId, wavefront->wfSlotId,
224 pkt->req->getPaddr());
225
226 /**
227 * this is necessary because the GPU TLB receives packets instead of
228 * requests. when the translation is complete, all relevent fields in
229 * the request will be populated, but not in the packet. here we create
230 * the new packet so we can set the size, addr, and proper flags.
231 */
232 PacketPtr oldPkt = pkt;
233 pkt = new Packet(oldPkt->req, oldPkt->cmd);
234 delete oldPkt;
235
236 /**
237 * if we have not reserved an entry in the fetch buffer,
238 * stop fetching. this can happen due to a branch instruction
239 * flushing the fetch buffer while an ITLB or I-cache request is still
240 * pending, in the same cycle another instruction is trying to fetch.
241 */
242 if (!fetchBuf.at(wavefront->wfSlotId).isReserved(pkt->req->getVaddr())) {
243 return;
244 }
245
246 /**
247 * we should have reserved an entry in the fetch buffer
248 * for this cache line. here we get the pointer to the
249 * entry used to buffer this request's line data.
250 */
251 pkt->dataStatic(fetchBuf.at(wavefront->wfSlotId)
252 .reservedBuf(pkt->req->getVaddr()));
253
254 // New SenderState for the memory access
255 pkt->senderState = new ComputeUnit::SQCPort::SenderState(wavefront);
256
257 if (timingSim) {
258 // translation is done. Send the appropriate timing memory request.
259
260 if (!computeUnit.sqcPort.sendTimingReq(pkt)) {
261 computeUnit.sqcPort.retries.push_back(std::make_pair(pkt,
262 wavefront));
263
264 DPRINTF(GPUPort, "CU%d: WF[%d][%d]: Fetch addr %#x failed!\n",
265 computeUnit.cu_id, wavefront->simdId, wavefront->wfSlotId,
266 pkt->req->getPaddr());
267 } else {
268 DPRINTF(GPUPort, "CU%d: WF[%d][%d]: Fetch addr %#x sent!\n",
269 computeUnit.cu_id, wavefront->simdId, wavefront->wfSlotId,
270 pkt->req->getPaddr());
271 }
272 } else {
273 computeUnit.sqcPort.sendFunctional(pkt);
274 processFetchReturn(pkt);
275 }
276 }
277
278 void
279 FetchUnit::processFetchReturn(PacketPtr pkt)
280 {
281 ComputeUnit::SQCPort::SenderState *sender_state =
282 safe_cast<ComputeUnit::SQCPort::SenderState*>(pkt->senderState);
283
284 Wavefront *wavefront = sender_state->wavefront;
285
286 DPRINTF(GPUFetch, "CU%d: WF[%d][%d]: Fetch addr %#x returned "
287 "%d bytes!\n", computeUnit.cu_id, wavefront->simdId,
288 wavefront->wfSlotId, pkt->req->getPaddr(), pkt->req->getSize());
289
290 if (wavefront->dropFetch) {
291 assert(wavefront->instructionBuffer.empty());
292 assert(!fetchBuf.at(wavefront->wfSlotId).hasFetchDataToProcess());
293 wavefront->dropFetch = false;
294 } else {
295 fetchBuf.at(wavefront->wfSlotId).fetchDone(pkt->req->getVaddr());
296 }
297
298 wavefront->pendingFetch = false;
299
300 delete pkt->senderState;
301 delete pkt;
302 }
303
304 void
305 FetchUnit::flushBuf(int wfSlotId)
306 {
307 fetchBuf.at(wfSlotId).flushBuf();
308 }
309
310 void
311 FetchUnit::bindWaveList(std::vector<Wavefront*> *wave_list)
312 {
313 waveList = wave_list;
314 }
315
316 /** FetchBufDesc */
317 void
318 FetchUnit::FetchBufDesc::allocateBuf(int fetch_depth, int cache_line_size,
319 Wavefront *wf)
320 {
321 wavefront = wf;
322 fetchDepth = fetch_depth;
323 maxIbSize = wavefront->maxIbSize;
324 cacheLineSize = cache_line_size;
325 maxFbSize = cacheLineSize * fetchDepth;
326
327 // Calculate the number of bits to address a cache line
328 panic_if(!isPowerOf2(cacheLineSize),
329 "Cache line size should be a power of two.");
330 cacheLineBits = floorLog2(cacheLineSize);
331
332 bufStart = new uint8_t[maxFbSize];
333 readPtr = bufStart;
334 bufEnd = bufStart + maxFbSize;
335
336 for (int i = 0; i < fetchDepth; ++i) {
337 freeList.emplace_back(readPtr + i * cacheLineSize);
338 }
339 }
340
341 void
342 FetchUnit::FetchBufDesc::flushBuf()
343 {
344 restartFromBranch = true;
345 /**
346 * free list may have some entries
347 * so we clear it here to avoid duplicates
348 */
349 freeList.clear();
350 bufferedPCs.clear();
351 reservedPCs.clear();
352 readPtr = bufStart;
353
354 for (int i = 0; i < fetchDepth; ++i) {
355 freeList.push_back(bufStart + i * cacheLineSize);
356 }
357
358 DPRINTF(GPUFetch, "WF[%d][%d]: Id%d Fetch dropped, flushing fetch "
359 "buffer\n", wavefront->simdId, wavefront->wfSlotId,
360 wavefront->wfDynId);
361 }
362
363 Addr
364 FetchUnit::FetchBufDesc::nextFetchAddr()
365 {
366 Addr next_line = 0;
367
368 if (bufferedAndReservedLines()) {
369 Addr last_line_fetched = 0;
370 if (!reservedLines()) {
371 /**
372 * get the PC of the most recently fetched cache line,
373 * then return the address of the next line.
374 */
375 last_line_fetched = bufferedPCs.rbegin()->first;
376 } else {
377 last_line_fetched = reservedPCs.rbegin()->first;
378 }
379
380 next_line = last_line_fetched + cacheLineSize;
381
382 /**
383 * should not be trying to fetch a line that has already
384 * been fetched.
385 */
386 assert(bufferedPCs.find(next_line) == bufferedPCs.end());
387 assert(reservedPCs.find(next_line) == reservedPCs.end());
388 } else {
389 /**
390 * we do not have any buffered cache lines yet, so we
391 * assume this is the initial fetch, or the first fetch
392 * after a branch, and get the PC directly from the WF.
393 * in the case of a branch, we may not start at the
394 * beginning of a cache line, so we adjust the readPtr by
395 * the current PC's offset from the start of the line.
396 */
397 next_line = makeLineAddress(wavefront->pc(), cacheLineBits);
398 readPtr = bufStart;
399
400 /**
401 * if we are here we have no buffered lines. in the case we flushed
402 * the buffer due to a branch, we may need to start fetching from
403 * some offset from the start of the fetch buffer, so we adjust for
404 * that here.
405 */
406 if (restartFromBranch) {
407 restartFromBranch = false;
408 int byte_offset
409 = wavefront->pc() - makeLineAddress(wavefront->pc(),
410 cacheLineBits);
411 readPtr += byte_offset;
412 }
413 }
414
415 return next_line;
416 }
417
418 void
419 FetchUnit::FetchBufDesc::reserveBuf(Addr vaddr)
420 {
421 // we should have free buffer space, and the line
422 // at vaddr should not already be cached.
423 assert(hasFreeSpace());
424 assert(bufferedPCs.find(vaddr) == bufferedPCs.end());
425 assert(reservedPCs.find(vaddr) == reservedPCs.end());
426 assert(bufferedAndReservedLines() < fetchDepth);
427
428 DPRINTF(GPUFetch, "WF[%d][%d]: Id%d reserved fetch buffer entry "
429 "for PC = %#x\n", wavefront->simdId, wavefront->wfSlotId,
430 wavefront->wfDynId, vaddr);
431
432 /**
433 * we reserve buffer space, by moving it out of the
434 * free list, however we do not mark the buffered
435 * line as valid until the fetch unit for this buffer
436 * has receieved the response from the memory system.
437 */
438 uint8_t *inst_buf = freeList.front();
439 reservedPCs.emplace(vaddr, inst_buf);
440 freeList.pop_front();
441 }
442
443 void
444 FetchUnit::FetchBufDesc::fetchDone(Addr vaddr)
445 {
446 assert(bufferedPCs.find(vaddr) == bufferedPCs.end());
447 DPRINTF(GPUFetch, "WF[%d][%d]: Id%d done fetching for addr %#x\n",
448 wavefront->simdId, wavefront->wfSlotId,
449 wavefront->wfDynId, vaddr);
450
451 /**
452 * this address should have an entry reserved in the
453 * fetch buffer already, however it should be invalid
454 * until the fetch completes.
455 */
456 auto reserved_pc = reservedPCs.find(vaddr);
457 assert(reserved_pc != reservedPCs.end());
458 bufferedPCs.emplace(vaddr, reserved_pc->second);
459
460 if (readPtr == bufEnd) {
461 readPtr = bufStart;
462 }
463
464 reserved_pc->second = nullptr;
465 reservedPCs.erase(reserved_pc);
466 }
467
468 bool
469 FetchUnit::FetchBufDesc::hasFetchDataToProcess() const
470 {
471 return fetchBytesRemaining() >= sizeof(TheGpuISA::RawMachInst);
472 }
473
474 void
475 FetchUnit::FetchBufDesc::checkWaveReleaseBuf()
476 {
477 Addr cur_wave_pc = roundDown(wavefront->pc(),
478 wavefront->computeUnit->cacheLineSize());
479 if (reservedPCs.find(cur_wave_pc) != reservedPCs.end()) {
480 DPRINTF(GPUFetch, "WF[%d][%d]: Id%d current wave PC(%#x) still "
481 "being fetched.\n", wavefront->simdId, wavefront->wfSlotId,
482 wavefront->wfDynId, cur_wave_pc);
483
484 // should be reserved, but not buffered yet
485 assert(bufferedPCs.find(cur_wave_pc) == bufferedPCs.end());
486
487 return;
488 }
489
490 auto current_buffered_pc = bufferedPCs.find(cur_wave_pc);
491 auto oldest_buffered_pc = bufferedPCs.begin();
492
493 DPRINTF(GPUFetch, "WF[%d][%d]: Id%d checking if PC block addr = %#x"
494 "(PC = %#x) can be released.\n", wavefront->simdId,
495 wavefront->wfSlotId, wavefront->wfDynId, cur_wave_pc,
496 wavefront->pc());
497
498 #ifdef DEBUG
499 int idx = 0;
500 for (const auto &buf_pc : bufferedPCs) {
501 DPRINTF(GPUFetch, "PC[%d] = %#x\n", idx, buf_pc.first);
502 ++idx;
503 }
504 #endif
505
506 // if we haven't buffered data for this PC, we shouldn't
507 // be fetching from it.
508 assert(current_buffered_pc != bufferedPCs.end());
509
510 /**
511 * we're using a std::map so the addresses are sorted. if this
512 * PC is not the oldest one in the map, we must be fetching from
513 * a newer block, and we can release the oldest PC's fetch buffer
514 * entry back to the free list.
515 */
516 if (current_buffered_pc != oldest_buffered_pc) {
517 DPRINTF(GPUFetch, "WF[%d][%d]: Id%d done fetching for PC = %#x, "
518 "removing it from the fetch buffer.\n", wavefront->simdId,
519 wavefront->wfSlotId, wavefront->wfDynId,
520 oldest_buffered_pc->first);
521
522 freeList.emplace_back(oldest_buffered_pc->second);
523 oldest_buffered_pc->second = nullptr;
524 bufferedPCs.erase(oldest_buffered_pc);
525 DPRINTF(GPUFetch, "WF[%d][%d]: Id%d has %d lines buffered.\n",
526 wavefront->simdId, wavefront->wfSlotId, wavefront->wfDynId,
527 bufferedLines());
528 }
529 }
530
531 void
532 FetchUnit::FetchBufDesc::decodeInsts()
533 {
534 assert(readPtr);
535
536 if (splitDecode()) {
537 decodeSplitInst();
538 }
539
540 while (wavefront->instructionBuffer.size() < maxIbSize
541 && hasFetchDataToProcess()) {
542 if (splitDecode()) {
543 decodeSplitInst();
544 } else {
545 TheGpuISA::MachInst mach_inst
546 = reinterpret_cast<TheGpuISA::MachInst>(readPtr);
547 GPUStaticInst *gpu_static_inst = _decoder->decode(mach_inst);
548 readPtr += gpu_static_inst->instSize();
549
550 assert(readPtr <= bufEnd);
551
552 GPUDynInstPtr gpu_dyn_inst
553 = std::make_shared<GPUDynInst>(wavefront->computeUnit,
554 wavefront, gpu_static_inst,
555 wavefront->computeUnit->
556 getAndIncSeqNum());
557 wavefront->instructionBuffer.push_back(gpu_dyn_inst);
558
559 DPRINTF(GPUFetch, "WF[%d][%d]: Id%ld decoded %s (%d bytes). "
560 "%d bytes remain.\n", wavefront->simdId,
561 wavefront->wfSlotId, wavefront->wfDynId,
562 gpu_static_inst->disassemble(),
563 gpu_static_inst->instSize(),
564 fetchBytesRemaining());
565 }
566 }
567 }
568
569 void
570 FetchUnit::FetchBufDesc::decodeSplitInst()
571 {
572 TheGpuISA::RawMachInst split_inst = 0;
573 int dword_size = sizeof(uint32_t);
574 int num_dwords = sizeof(TheGpuISA::RawMachInst) / dword_size;
575
576 for (int i = 0; i < num_dwords; ++i) {
577 ((uint32_t*)(&split_inst))[i] = *reinterpret_cast<uint32_t*>(readPtr);
578 if (readPtr + dword_size >= bufEnd) {
579 readPtr = bufStart;
580 }
581 }
582
583 assert(readPtr == bufStart);
584
585 TheGpuISA::MachInst mach_inst
586 = reinterpret_cast<TheGpuISA::MachInst>(&split_inst);
587 GPUStaticInst *gpu_static_inst = _decoder->decode(mach_inst);
588 readPtr += (gpu_static_inst->instSize() - dword_size);
589 assert(readPtr < bufEnd);
590
591 GPUDynInstPtr gpu_dyn_inst
592 = std::make_shared<GPUDynInst>(wavefront->computeUnit,
593 wavefront, gpu_static_inst,
594 wavefront->computeUnit->
595 getAndIncSeqNum());
596 wavefront->instructionBuffer.push_back(gpu_dyn_inst);
597
598 DPRINTF(GPUFetch, "WF[%d][%d]: Id%d decoded split inst %s (%#x) "
599 "(%d bytes). %d bytes remain in %d buffered lines.\n",
600 wavefront->simdId, wavefront->wfSlotId, wavefront->wfDynId,
601 gpu_static_inst->disassemble(), split_inst,
602 gpu_static_inst->instSize(), fetchBytesRemaining(),
603 bufferedLines());
604 }
605
606 bool
607 FetchUnit::FetchBufDesc::splitDecode() const
608 {
609 /**
610 * if a read of a raw instruction would go beyond the end
611 * of the fetch buffer, then we must perform a split decode.
612 */
613 bool is_split = (readPtr + sizeof(TheGpuISA::RawMachInst)) > bufEnd;
614
615 return is_split;
616 }
617
618 int
619 FetchUnit::FetchBufDesc::fetchBytesRemaining() const
620 {
621 int bytes_remaining = 0;
622
623 if (bufferedLines() && readPtr != bufEnd) {
624 auto last_buf_pc = bufferedPCs.rbegin();
625 uint8_t *end_ptr = last_buf_pc->second + cacheLineSize;
626 int byte_diff = end_ptr - readPtr;
627
628 if (end_ptr > readPtr) {
629 bytes_remaining = byte_diff;
630 } else if (end_ptr < readPtr) {
631 bytes_remaining = bufferedBytes() + byte_diff;
632 }
633 }
634
635 assert(bytes_remaining <= bufferedBytes());
636 return bytes_remaining;
637 }