resp_tick_latency(p->mem_resp_latency * p->clk_domain->clockPeriod()),
_masterId(p->system->getMasterId(this, "ComputeUnit")),
lds(*p->localDataStore), gmTokenPort(name() + ".gmTokenPort", this),
+ ldsPort(csprintf("%s-port", name()), this),
+ scalarDataPort(csprintf("%s-port", name()), this),
+ scalarDTLBPort(csprintf("%s-port", name()), this),
+ sqcPort(csprintf("%s-port", name()), this),
+ sqcTLBPort(csprintf("%s-port", name()), this),
_cacheLineSize(p->system->cacheLineSize()),
_numBarrierSlots(p->num_barrier_slots),
globalSeqNum(0), wavefrontSize(p->wf_size),
fatal("Invalid WF execution policy (CU)\n");
}
- memPort.resize(wfSize());
+ for (int i = 0; i < p->port_memory_port_connection_count; ++i) {
+ memPort.emplace_back(csprintf("%s-port%d", name(), i), this, i);
+ }
+
+ for (int i = 0; i < p->port_translation_port_connection_count; ++i) {
+ tlbPort.emplace_back(csprintf("%s-port%d", name(), i), this, i);
+ }
// Setup tokens for slave ports. The number of tokens in memSlaveTokens
// is the total token count for the entire vector port (i.e., this CU).
memPortTokens = new TokenManager(p->max_cu_tokens);
- // resize the tlbPort vectorArray
- int tlbPort_width = perLaneTLB ? wfSize() : 1;
- tlbPort.resize(tlbPort_width);
-
registerExitCallback([this]() { exitCallback(); });
lastExecCycle.resize(numVectorALUs, 0);
lastVaddrSimd[j].clear();
}
lastVaddrCU.clear();
- delete ldsPort;
}
int
// appropriate cycle to process the timing memory response
// This delay represents the pipeline delay
SenderState *sender_state = safe_cast<SenderState*>(pkt->senderState);
- int index = sender_state->port_index;
+ PortID index = sender_state->port_index;
GPUDynInstPtr gpuDynInst = sender_state->_gpuDynInst;
GPUDispatcher &dispatcher = computeUnit->shader->dispatcher();
}
EventFunctionWrapper *mem_resp_event =
- computeUnit->memPort[index]->createMemRespEvent(pkt);
+ computeUnit->memPort[index].createMemRespEvent(pkt);
DPRINTF(GPUPort,
"CU%d: WF[%d][%d]: gpuDynInst: %d, index %d, addr %#x received!\n",
}
void
-ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
+ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt)
{
// There must be a way around this check to do the globalMemStart...
Addr tmp_vaddr = pkt->req->getVaddr();
tlbCycles -= curTick();
++tlbRequests;
- int tlbPort_index = perLaneTLB ? index : 0;
+ PortID tlbPort_index = perLaneTLB ? index : 0;
if (shader->timingSim) {
if (debugSegFault) {
pkt->senderState = translation_state;
if (functionalTLB) {
- tlbPort[tlbPort_index]->sendFunctional(pkt);
+ tlbPort[tlbPort_index].sendFunctional(pkt);
// update the hitLevel distribution
int hit_level = translation_state->hitLevel;
// translation is done. Schedule the mem_req_event at the
// appropriate cycle to send the timing memory request to ruby
EventFunctionWrapper *mem_req_event =
- memPort[index]->createMemReqEvent(pkt);
+ memPort[index].createMemReqEvent(pkt);
DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x data "
"scheduled\n", cu_id, gpuDynInst->simdId,
gpuDynInst->wfSlotId, index, pkt->req->getPaddr());
schedule(mem_req_event, curTick() + req_tick_latency);
- } else if (tlbPort[tlbPort_index]->isStalled()) {
- assert(tlbPort[tlbPort_index]->retries.size() > 0);
+ } else if (tlbPort[tlbPort_index].isStalled()) {
+ assert(tlbPort[tlbPort_index].retries.size() > 0);
DPRINTF(GPUTLB, "CU%d: WF[%d][%d]: Translation for addr %#x "
"failed!\n", cu_id, gpuDynInst->simdId,
gpuDynInst->wfSlotId, tmp_vaddr);
- tlbPort[tlbPort_index]->retries.push_back(pkt);
- } else if (!tlbPort[tlbPort_index]->sendTimingReq(pkt)) {
+ tlbPort[tlbPort_index].retries.push_back(pkt);
+ } else if (!tlbPort[tlbPort_index].sendTimingReq(pkt)) {
// Stall the data port;
// No more packet will be issued till
// ruby indicates resources are freed by
// a recvReqRetry() call back on this port.
- tlbPort[tlbPort_index]->stallPort();
+ tlbPort[tlbPort_index].stallPort();
DPRINTF(GPUTLB, "CU%d: WF[%d][%d]: Translation for addr %#x "
"failed!\n", cu_id, gpuDynInst->simdId,
gpuDynInst->wfSlotId, tmp_vaddr);
- tlbPort[tlbPort_index]->retries.push_back(pkt);
+ tlbPort[tlbPort_index].retries.push_back(pkt);
} else {
DPRINTF(GPUTLB,
"CU%d: WF[%d][%d]: Translation for addr %#x sent!\n",
pkt->senderState = new TheISA::GpuTLB::TranslationState(TLB_mode,
shader->gpuTc);
- tlbPort[tlbPort_index]->sendFunctional(pkt);
+ tlbPort[tlbPort_index].sendFunctional(pkt);
// the addr of the packet is not modified, so we need to create a new
// packet, or otherwise the memory access will have the old virtual
new_pkt->dataStatic(pkt->getPtr<uint8_t>());
// Translation is done. It is safe to send the packet to memory.
- memPort[0]->sendFunctional(new_pkt);
+ memPort[0].sendFunctional(new_pkt);
DPRINTF(GPUMem, "Functional sendRequest\n");
DPRINTF(GPUMem, "CU%d: WF[%d][%d]: index %d: addr %#x\n", cu_id,
new TheISA::GpuTLB::TranslationState(tlb_mode, shader->gpuTc, false,
pkt->senderState);
- if (scalarDTLBPort->isStalled()) {
- assert(scalarDTLBPort->retries.size());
- scalarDTLBPort->retries.push_back(pkt);
- } else if (!scalarDTLBPort->sendTimingReq(pkt)) {
- scalarDTLBPort->stallPort();
- scalarDTLBPort->retries.push_back(pkt);
+ if (scalarDTLBPort.isStalled()) {
+ assert(scalarDTLBPort.retries.size());
+ scalarDTLBPort.retries.push_back(pkt);
+ } else if (!scalarDTLBPort.sendTimingReq(pkt)) {
+ scalarDTLBPort.stallPort();
+ scalarDTLBPort.retries.push_back(pkt);
} else {
DPRINTF(GPUTLB, "sent scalar %s translation request for addr %#x\n",
tlb_mode == BaseTLB::Read ? "read" : "write",
new ComputeUnit::DataPort::SenderState(gpuDynInst, 0, nullptr));
EventFunctionWrapper *mem_req_event =
- memPort[0]->createMemReqEvent(pkt);
+ memPort[0].createMemReqEvent(pkt);
DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x scheduling "
"an acquire\n", cu_id, gpuDynInst->simdId,
new ComputeUnit::DataPort::SenderState(gpuDynInst, 0, nullptr));
EventFunctionWrapper *mem_req_event =
- memPort[0]->createMemReqEvent(pkt);
+ memPort[0].createMemReqEvent(pkt);
DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x scheduling "
"a release\n", cu_id, gpuDynInst->simdId,
new ComputeUnit::DataPort::SenderState(gpuDynInst, 0, nullptr));
EventFunctionWrapper *mem_req_event =
- memPort[0]->createMemReqEvent(pkt);
+ memPort[0].createMemReqEvent(pkt);
DPRINTF(GPUPort,
"CU%d: WF[%d][%d]: index %d, addr %#x sync scheduled\n",
DPRINTF(GPUPort, "CU%d: WF[%d][%d]: Response for addr %#x, index %d\n",
compute_unit->cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId,
- pkt->req->getPaddr(), index);
+ pkt->req->getPaddr(), id);
Addr paddr = pkt->req->getPaddr();
int index = gpuDynInst->memStatusVector[paddr].back();
DPRINTF(GPUMem, "Response for addr %#x, index %d\n",
- pkt->req->getPaddr(), index);
+ pkt->req->getPaddr(), id);
gpuDynInst->memStatusVector[paddr].pop_back();
gpuDynInst->pAddr = pkt->req->getPaddr();
safe_cast<DTLBPort::SenderState*>(pkt->senderState);
GPUDynInstPtr gpuDynInst = sender_state->_gpuDynInst;
- int mp_index = sender_state->portIndex;
+ PortID mp_index = sender_state->portIndex;
Addr vaddr = pkt->req->getVaddr();
gpuDynInst->memStatusVector[line].push_back(mp_index);
gpuDynInst->tlbHitLevel[mp_index] = hit_level;
// translation is done. Schedule the mem_req_event at the appropriate
// cycle to send the timing memory request to ruby
EventFunctionWrapper *mem_req_event =
- computeUnit->memPort[mp_index]->createMemReqEvent(new_pkt);
+ computeUnit->memPort[mp_index].createMemReqEvent(new_pkt);
DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x data scheduled\n",
computeUnit->cu_id, gpuDynInst->simdId,
DPRINTF(GPUPort,
"CU%d: WF[%d][%d]: index %d, addr %#x data req failed!\n",
- compute_unit->cu_id, gpuDynInst->simdId,
- gpuDynInst->wfSlotId, index,
- pkt->req->getPaddr());
+ compute_unit->cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId,
+ id, pkt->req->getPaddr());
} else {
DPRINTF(GPUPort,
"CU%d: WF[%d][%d]: gpuDynInst: %d, index %d, addr %#x data "
"req sent!\n", compute_unit->cu_id, gpuDynInst->simdId,
- gpuDynInst->wfSlotId, gpuDynInst->seqNum(), index,
+ gpuDynInst->wfSlotId, gpuDynInst->seqNum(), id,
pkt->req->getPaddr());
}
}
{
SenderState *sender_state = safe_cast<SenderState*>(pkt->senderState);
GPUDynInstPtr gpuDynInst = sender_state->_gpuDynInst;
- ComputeUnit *compute_unit M5_VAR_USED = scalarDataPort->computeUnit;
+ ComputeUnit *compute_unit M5_VAR_USED = scalarDataPort.computeUnit;
- if (!(scalarDataPort->sendTimingReq(pkt))) {
- scalarDataPort->retries.push_back(pkt);
+ if (!(scalarDataPort.sendTimingReq(pkt))) {
+ scalarDataPort.retries.push_back(pkt);
DPRINTF(GPUPort,
- "CU%d: WF[%d][%d]: index %d, addr %#x data req failed!\n",
+ "CU%d: WF[%d][%d]: addr %#x data req failed!\n",
compute_unit->cu_id, gpuDynInst->simdId,
- gpuDynInst->wfSlotId, scalarDataPort->index,
- pkt->req->getPaddr());
+ gpuDynInst->wfSlotId, pkt->req->getPaddr());
} else {
DPRINTF(GPUPort,
- "CU%d: WF[%d][%d]: gpuDynInst: %d, index %d, addr %#x data "
+ "CU%d: WF[%d][%d]: gpuDynInst: %d, addr %#x data "
"req sent!\n", compute_unit->cu_id, gpuDynInst->simdId,
gpuDynInst->wfSlotId, gpuDynInst->seqNum(),
- scalarDataPort->index, pkt->req->getPaddr());
+ pkt->req->getPaddr());
}
}
req_pkt->senderState =
new ComputeUnit::ScalarDataPort::SenderState(gpuDynInst);
- if (!computeUnit->scalarDataPort->sendTimingReq(req_pkt)) {
- computeUnit->scalarDataPort->retries.push_back(req_pkt);
+ if (!computeUnit->scalarDataPort.sendTimingReq(req_pkt)) {
+ computeUnit->scalarDataPort.retries.push_back(req_pkt);
DPRINTF(GPUMem, "send scalar req failed for: %s\n",
gpuDynInst->disassemble());
} else {
// This is the SenderState needed upon return
newPacket->senderState = new LDSPort::SenderState(gpuDynInst);
- return ldsPort->sendTimingReq(newPacket);
+ return ldsPort.sendTimingReq(newPacket);
}
/**
void doSmReturn(GPUDynInstPtr gpuDynInst);
virtual void init() override;
- void sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt);
+ void sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt);
void sendScalarRequest(GPUDynInstPtr gpuDynInst, PacketPtr pkt);
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst,
bool kernelMemSync,
class DataPort : public RequestPort
{
public:
- DataPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
- : RequestPort(_name, _cu), computeUnit(_cu),
- index(_index) { }
+ DataPort(const std::string &_name, ComputeUnit *_cu, PortID id)
+ : RequestPort(_name, _cu, id), computeUnit(_cu) { }
bool snoopRangeSent;
struct SenderState : public Packet::SenderState
{
GPUDynInstPtr _gpuDynInst;
- int port_index;
+ PortID port_index;
Packet::SenderState *saved;
SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index,
protected:
ComputeUnit *computeUnit;
- int index;
virtual bool recvTimingResp(PacketPtr pkt);
virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
class ScalarDataPort : public RequestPort
{
public:
- ScalarDataPort(const std::string &_name, ComputeUnit *_cu,
- PortID _index)
- : RequestPort(_name, _cu, _index), computeUnit(_cu), index(_index)
+ ScalarDataPort(const std::string &_name, ComputeUnit *_cu)
+ : RequestPort(_name, _cu), computeUnit(_cu)
{
- (void)index;
}
bool recvTimingResp(PacketPtr pkt) override;
class MemReqEvent : public Event
{
private:
- ScalarDataPort *scalarDataPort;
+ ScalarDataPort &scalarDataPort;
PacketPtr pkt;
public:
- MemReqEvent(ScalarDataPort *_scalar_data_port, PacketPtr _pkt)
+ MemReqEvent(ScalarDataPort &_scalar_data_port, PacketPtr _pkt)
: Event(), scalarDataPort(_scalar_data_port), pkt(_pkt)
{
setFlags(Event::AutoDelete);
private:
ComputeUnit *computeUnit;
- PortID index;
};
// Instruction cache access port
class SQCPort : public RequestPort
{
public:
- SQCPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
- : RequestPort(_name, _cu), computeUnit(_cu),
- index(_index) { }
+ SQCPort(const std::string &_name, ComputeUnit *_cu)
+ : RequestPort(_name, _cu), computeUnit(_cu) { }
bool snoopRangeSent;
protected:
ComputeUnit *computeUnit;
- int index;
virtual bool recvTimingResp(PacketPtr pkt);
virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
class DTLBPort : public RequestPort
{
public:
- DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
- : RequestPort(_name, _cu), computeUnit(_cu),
- index(_index), stalled(false)
+ DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID id)
+ : RequestPort(_name, _cu, id), computeUnit(_cu),
+ stalled(false)
{ }
bool isStalled() { return stalled; }
// the lane in the memInst this is associated with, so we send
// the memory request down the right port
- int portIndex;
+ PortID portIndex;
// constructor used for packets involved in timing accesses
SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
protected:
ComputeUnit *computeUnit;
- int index;
bool stalled;
virtual bool recvTimingResp(PacketPtr pkt);
class LDSPort : public RequestPort
{
public:
- LDSPort(const std::string &_name, ComputeUnit *_cu, PortID _id)
- : RequestPort(_name, _cu, _id), computeUnit(_cu)
+ LDSPort(const std::string &_name, ComputeUnit *_cu)
+ : RequestPort(_name, _cu), computeUnit(_cu)
{
}
/** The port to access the Local Data Store
* Can be connected to a LDS object
*/
- LDSPort *ldsPort = nullptr;
-
- LDSPort *
- getLdsPort() const
- {
- return ldsPort;
- }
+ LDSPort ldsPort;
TokenManager *
getTokenManager()
/** The memory port for SIMD data accesses.
* Can be connected to PhysMem for Ruby for timing simulations
*/
- std::vector<DataPort*> memPort;
+ std::vector<DataPort> memPort;
// port to the TLB hierarchy (i.e., the L1 TLB)
- std::vector<DTLBPort*> tlbPort;
+ std::vector<DTLBPort> tlbPort;
// port to the scalar data cache
- ScalarDataPort *scalarDataPort;
+ ScalarDataPort scalarDataPort;
// port to the scalar data TLB
- ScalarDTLBPort *scalarDTLBPort;
+ ScalarDTLBPort scalarDTLBPort;
// port to the SQC (i.e. the I-cache)
- SQCPort *sqcPort;
+ SQCPort sqcPort;
// port to the SQC TLB (there's a separate TLB for each I-cache)
- ITLBPort *sqcTLBPort;
+ ITLBPort sqcTLBPort;
Port &
getPort(const std::string &if_name, PortID idx) override
{
- if (if_name == "memory_port") {
- memPort[idx] = new DataPort(csprintf("%s-port%d", name(), idx),
- this, idx);
- return *memPort[idx];
- } else if (if_name == "translation_port") {
- tlbPort[idx] = new DTLBPort(csprintf("%s-port%d", name(), idx),
- this, idx);
- return *tlbPort[idx];
+ if (if_name == "memory_port" && idx < memPort.size()) {
+ return memPort[idx];
+ } else if (if_name == "translation_port" && idx < tlbPort.size()) {
+ return tlbPort[idx];
} else if (if_name == "scalar_port") {
- scalarDataPort = new ScalarDataPort(csprintf("%s-port%d", name(),
- idx), this, idx);
- return *scalarDataPort;
+ return scalarDataPort;
} else if (if_name == "scalar_tlb_port") {
- scalarDTLBPort = new ScalarDTLBPort(csprintf("%s-port", name()),
- this);
- return *scalarDTLBPort;
+ return scalarDTLBPort;
} else if (if_name == "sqc_port") {
- sqcPort = new SQCPort(csprintf("%s-port%d", name(), idx),
- this, idx);
- return *sqcPort;
+ return sqcPort;
} else if (if_name == "sqc_tlb_port") {
- sqcTLBPort = new ITLBPort(csprintf("%s-port", name()), this);
- return *sqcTLBPort;
+ return sqcTLBPort;
} else if (if_name == "ldsPort") {
- if (ldsPort) {
- fatal("an LDS port was already allocated");
- }
- ldsPort = new LDSPort(csprintf("%s-port", name()), this, idx);
- return *ldsPort;
- } else if (if_name == "gmTokenPort") {
- return gmTokenPort;
+ return ldsPort;
} else {
- panic("incorrect port name");
+ return ClockedObject::getPort(if_name, idx);
}
}