req_tick_latency(p->mem_req_latency * p->clk_domain->clockPeriod()),
resp_tick_latency(p->mem_resp_latency * p->clk_domain->clockPeriod()),
_masterId(p->system->getMasterId(name() + ".ComputeUnit")),
- lds(*p->localDataStore), globalSeqNum(0), wavefrontSize(p->wfSize),
+ lds(*p->localDataStore), _cacheLineSize(p->system->cacheLineSize()),
+ globalSeqNum(0), wavefrontSize(p->wfSize),
kernelLaunchInst(new KernelLaunchStaticInst())
{
/**
int32_t
getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const;
+ int cacheLineSize() const { return _cacheLineSize; }
+
bool
sendToLds(GPUDynInstPtr gpuDynInst) __attribute__((warn_unused_result));
uint64_t getAndIncSeqNum() { return globalSeqNum++; }
private:
+ const int _cacheLineSize;
uint64_t globalSeqNum;
int wavefrontSize;
GPUStaticInst *kernelLaunchInst;
// Since this is an instruction prefetch, if you're split then just finish
// out the current line.
- unsigned block_size = RubySystem::getBlockSizeBytes();
+ int block_size = computeUnit->cacheLineSize();
// check for split accesses
Addr split_addr = roundDown(vaddr + block_size - 1, block_size);
- unsigned size = block_size;
+ int size = block_size;
if (split_addr > vaddr) {
// misaligned access, just grab the rest of the line
Shader::doFunctionalAccess(RequestPtr req, MemCmd cmd, void *data,
bool suppress_func_errors, int cu_id)
{
- unsigned block_size = RubySystem::getBlockSizeBytes();
+ int block_size = cuList.at(cu_id)->cacheLineSize();
unsigned size = req->getSize();
Addr tmp_addr;
{
uint8_t *data_buf = (uint8_t*)ptr;
- for (ChunkGenerator gen(address, size, RubySystem::getBlockSizeBytes());
+ for (ChunkGenerator gen(address, size, cuList.at(cu_id)->cacheLineSize());
!gen.done(); gen.next()) {
Request *req = new Request(0, gen.addr(), gen.size(), 0,
cuList[0]->masterId(), 0, 0, 0);