gpu-compute: Create CU's ports in the standard way

author Tony Gutierrez <anthony.gutierrez@amd.com>

Fri, 29 Mar 2019 21:48:39 +0000 (17:48 -0400)

committer Anthony Gutierrez <anthony.gutierrez@amd.com>

Thu, 27 Aug 2020 16:31:46 +0000 (16:31 +0000)
author Tony Gutierrez <anthony.gutierrez@amd.com>
Fri, 29 Mar 2019 21:48:39 +0000 (17:48 -0400)
committer Anthony Gutierrez <anthony.gutierrez@amd.com>
Thu, 27 Aug 2020 16:31:46 +0000 (16:31 +0000)
diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc

index 9a41233b640b4ac5046a4af7046952e2e2e25ef9..2d64fa3c7c4f1b38f30cedd900b2a08f668184ac 100644 (file)
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -96,6 +96,11 @@ ComputeUnit::ComputeUnit(const Params *p) : ClockedObject(p),
      resp_tick_latency(p->mem_resp_latency * p->clk_domain->clockPeriod()),
      _masterId(p->system->getMasterId(this, "ComputeUnit")),
      lds(*p->localDataStore), gmTokenPort(name() + ".gmTokenPort", this),
+    ldsPort(csprintf("%s-port", name()), this),
+    scalarDataPort(csprintf("%s-port", name()), this),
+    scalarDTLBPort(csprintf("%s-port", name()), this),
+    sqcPort(csprintf("%s-port", name()), this),
+    sqcTLBPort(csprintf("%s-port", name()), this),
      _cacheLineSize(p->system->cacheLineSize()),
      _numBarrierSlots(p->num_barrier_slots),
      globalSeqNum(0), wavefrontSize(p->wf_size),
@@ -169,16 +174,18 @@ ComputeUnit::ComputeUnit(const Params *p) : ClockedObject(p),
          fatal("Invalid WF execution policy (CU)\n");
      }
  
-    memPort.resize(wfSize());
+    for (int i = 0; i < p->port_memory_port_connection_count; ++i) {
+        memPort.emplace_back(csprintf("%s-port%d", name(), i), this, i);
+    }
+
+    for (int i = 0; i < p->port_translation_port_connection_count; ++i) {
+        tlbPort.emplace_back(csprintf("%s-port%d", name(), i), this, i);
+    }
  
      // Setup tokens for slave ports. The number of tokens in memSlaveTokens
      // is the total token count for the entire vector port (i.e., this CU).
      memPortTokens = new TokenManager(p->max_cu_tokens);
  
-    // resize the tlbPort vectorArray
-    int tlbPort_width = perLaneTLB ? wfSize() : 1;
-    tlbPort.resize(tlbPort_width);
-
      registerExitCallback([this]() { exitCallback(); });
  
      lastExecCycle.resize(numVectorALUs, 0);
@@ -214,7 +221,6 @@ ComputeUnit::~ComputeUnit()
          lastVaddrSimd[j].clear();
      }
      lastVaddrCU.clear();
-    delete ldsPort;
  }
  
  int
@@ -781,7 +787,7 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
      // appropriate cycle to process the timing memory response
      // This delay represents the pipeline delay
      SenderState *sender_state = safe_cast<SenderState*>(pkt->senderState);
-    int index = sender_state->port_index;
+    PortID index = sender_state->port_index;
      GPUDynInstPtr gpuDynInst = sender_state->_gpuDynInst;
      GPUDispatcher &dispatcher = computeUnit->shader->dispatcher();
  
@@ -886,7 +892,7 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
      }
  
      EventFunctionWrapper *mem_resp_event =
-        computeUnit->memPort[index]->createMemRespEvent(pkt);
+        computeUnit->memPort[index].createMemRespEvent(pkt);
  
      DPRINTF(GPUPort,
              "CU%d: WF[%d][%d]: gpuDynInst: %d, index %d, addr %#x received!\n",
@@ -1007,7 +1013,7 @@ ComputeUnit::SQCPort::recvReqRetry()
  }
  
  void
-ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
+ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt)
  {
      // There must be a way around this check to do the globalMemStart...
      Addr tmp_vaddr = pkt->req->getVaddr();
@@ -1039,7 +1045,7 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
      tlbCycles -= curTick();
      ++tlbRequests;
  
-    int tlbPort_index = perLaneTLB ? index : 0;
+    PortID tlbPort_index = perLaneTLB ? index : 0;
  
      if (shader->timingSim) {
          if (debugSegFault) {
@@ -1074,7 +1080,7 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
          pkt->senderState = translation_state;
  
          if (functionalTLB) {
-            tlbPort[tlbPort_index]->sendFunctional(pkt);
+            tlbPort[tlbPort_index].sendFunctional(pkt);
  
              // update the hitLevel distribution
              int hit_level = translation_state->hitLevel;
@@ -1117,33 +1123,33 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
              // translation is done. Schedule the mem_req_event at the
              // appropriate cycle to send the timing memory request to ruby
              EventFunctionWrapper *mem_req_event =
-                memPort[index]->createMemReqEvent(pkt);
+                memPort[index].createMemReqEvent(pkt);
  
              DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x data "
                      "scheduled\n", cu_id, gpuDynInst->simdId,
                      gpuDynInst->wfSlotId, index, pkt->req->getPaddr());
  
              schedule(mem_req_event, curTick() + req_tick_latency);
-        } else if (tlbPort[tlbPort_index]->isStalled()) {
-            assert(tlbPort[tlbPort_index]->retries.size() > 0);
+        } else if (tlbPort[tlbPort_index].isStalled()) {
+            assert(tlbPort[tlbPort_index].retries.size() > 0);
  
              DPRINTF(GPUTLB, "CU%d: WF[%d][%d]: Translation for addr %#x "
                      "failed!\n", cu_id, gpuDynInst->simdId,
                      gpuDynInst->wfSlotId, tmp_vaddr);
  
-            tlbPort[tlbPort_index]->retries.push_back(pkt);
-        } else if (!tlbPort[tlbPort_index]->sendTimingReq(pkt)) {
+            tlbPort[tlbPort_index].retries.push_back(pkt);
+        } else if (!tlbPort[tlbPort_index].sendTimingReq(pkt)) {
              // Stall the data port;
              // No more packet will be issued till
              // ruby indicates resources are freed by
              // a recvReqRetry() call back on this port.
-            tlbPort[tlbPort_index]->stallPort();
+            tlbPort[tlbPort_index].stallPort();
  
              DPRINTF(GPUTLB, "CU%d: WF[%d][%d]: Translation for addr %#x "
                      "failed!\n", cu_id, gpuDynInst->simdId,
                      gpuDynInst->wfSlotId, tmp_vaddr);
  
-            tlbPort[tlbPort_index]->retries.push_back(pkt);
+            tlbPort[tlbPort_index].retries.push_back(pkt);
          } else {
             DPRINTF(GPUTLB,
                     "CU%d: WF[%d][%d]: Translation for addr %#x sent!\n",
@@ -1163,7 +1169,7 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
          pkt->senderState = new TheISA::GpuTLB::TranslationState(TLB_mode,
                                                                  shader->gpuTc);
  
-        tlbPort[tlbPort_index]->sendFunctional(pkt);
+        tlbPort[tlbPort_index].sendFunctional(pkt);
  
          // the addr of the packet is not modified, so we need to create a new
          // packet, or otherwise the memory access will have the old virtual
@@ -1173,7 +1179,7 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
          new_pkt->dataStatic(pkt->getPtr<uint8_t>());
  
          // Translation is done. It is safe to send the packet to memory.
-        memPort[0]->sendFunctional(new_pkt);
+        memPort[0].sendFunctional(new_pkt);
  
          DPRINTF(GPUMem, "Functional sendRequest\n");
          DPRINTF(GPUMem, "CU%d: WF[%d][%d]: index %d: addr %#x\n", cu_id,
@@ -1205,12 +1211,12 @@ ComputeUnit::sendScalarRequest(GPUDynInstPtr gpuDynInst, PacketPtr pkt)
          new TheISA::GpuTLB::TranslationState(tlb_mode, shader->gpuTc, false,
                                               pkt->senderState);
  
-    if (scalarDTLBPort->isStalled()) {
-        assert(scalarDTLBPort->retries.size());
-        scalarDTLBPort->retries.push_back(pkt);
-    } else if (!scalarDTLBPort->sendTimingReq(pkt)) {
-        scalarDTLBPort->stallPort();
-        scalarDTLBPort->retries.push_back(pkt);
+    if (scalarDTLBPort.isStalled()) {
+        assert(scalarDTLBPort.retries.size());
+        scalarDTLBPort.retries.push_back(pkt);
+    } else if (!scalarDTLBPort.sendTimingReq(pkt)) {
+        scalarDTLBPort.stallPort();
+        scalarDTLBPort.retries.push_back(pkt);
      } else {
          DPRINTF(GPUTLB, "sent scalar %s translation request for addr %#x\n",
                  tlb_mode == BaseTLB::Read ? "read" : "write",
@@ -1246,7 +1252,7 @@ ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst,
                 new ComputeUnit::DataPort::SenderState(gpuDynInst, 0, nullptr));
  
              EventFunctionWrapper *mem_req_event =
-              memPort[0]->createMemReqEvent(pkt);
+              memPort[0].createMemReqEvent(pkt);
  
              DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x scheduling "
                      "an acquire\n", cu_id, gpuDynInst->simdId,
@@ -1266,7 +1272,7 @@ ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst,
               new ComputeUnit::DataPort::SenderState(gpuDynInst, 0, nullptr));
  
            EventFunctionWrapper *mem_req_event =
-            memPort[0]->createMemReqEvent(pkt);
+            memPort[0].createMemReqEvent(pkt);
  
            DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x scheduling "
                    "a release\n", cu_id, gpuDynInst->simdId,
@@ -1284,7 +1290,7 @@ ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst,
              new ComputeUnit::DataPort::SenderState(gpuDynInst, 0, nullptr));
  
          EventFunctionWrapper *mem_req_event =
-          memPort[0]->createMemReqEvent(pkt);
+          memPort[0].createMemReqEvent(pkt);
  
          DPRINTF(GPUPort,
                  "CU%d: WF[%d][%d]: index %d, addr %#x sync scheduled\n",
@@ -1308,7 +1314,7 @@ ComputeUnit::DataPort::processMemRespEvent(PacketPtr pkt)
  
      DPRINTF(GPUPort, "CU%d: WF[%d][%d]: Response for addr %#x, index %d\n",
              compute_unit->cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId,
-            pkt->req->getPaddr(), index);
+            pkt->req->getPaddr(), id);
  
      Addr paddr = pkt->req->getPaddr();
  
@@ -1321,7 +1327,7 @@ ComputeUnit::DataPort::processMemRespEvent(PacketPtr pkt)
      int index = gpuDynInst->memStatusVector[paddr].back();
  
      DPRINTF(GPUMem, "Response for addr %#x, index %d\n",
-            pkt->req->getPaddr(), index);
+            pkt->req->getPaddr(), id);
  
      gpuDynInst->memStatusVector[paddr].pop_back();
      gpuDynInst->pAddr = pkt->req->getPaddr();
@@ -1425,7 +1431,7 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt)
          safe_cast<DTLBPort::SenderState*>(pkt->senderState);
  
      GPUDynInstPtr gpuDynInst = sender_state->_gpuDynInst;
-    int mp_index = sender_state->portIndex;
+    PortID mp_index = sender_state->portIndex;
      Addr vaddr = pkt->req->getVaddr();
      gpuDynInst->memStatusVector[line].push_back(mp_index);
      gpuDynInst->tlbHitLevel[mp_index] = hit_level;
@@ -1535,7 +1541,7 @@ ComputeUnit::DTLBPort::recvTimingResp(PacketPtr pkt)
      // translation is done. Schedule the mem_req_event at the appropriate
      // cycle to send the timing memory request to ruby
      EventFunctionWrapper *mem_req_event =
-        computeUnit->memPort[mp_index]->createMemReqEvent(new_pkt);
+        computeUnit->memPort[mp_index].createMemReqEvent(new_pkt);
  
      DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x data scheduled\n",
              computeUnit->cu_id, gpuDynInst->simdId,
@@ -1575,14 +1581,13 @@ ComputeUnit::DataPort::processMemReqEvent(PacketPtr pkt)
  
          DPRINTF(GPUPort,
                  "CU%d: WF[%d][%d]: index %d, addr %#x data req failed!\n",
-                compute_unit->cu_id, gpuDynInst->simdId,
-                gpuDynInst->wfSlotId, index,
-                pkt->req->getPaddr());
+                compute_unit->cu_id, gpuDynInst->simdId, gpuDynInst->wfSlotId,
+                id, pkt->req->getPaddr());
      } else {
          DPRINTF(GPUPort,
                  "CU%d: WF[%d][%d]: gpuDynInst: %d, index %d, addr %#x data "
                  "req sent!\n", compute_unit->cu_id, gpuDynInst->simdId,
-                gpuDynInst->wfSlotId, gpuDynInst->seqNum(), index,
+                gpuDynInst->wfSlotId, gpuDynInst->seqNum(), id,
                  pkt->req->getPaddr());
      }
  }
@@ -1598,22 +1603,21 @@ ComputeUnit::ScalarDataPort::MemReqEvent::process()
  {
      SenderState *sender_state = safe_cast<SenderState*>(pkt->senderState);
      GPUDynInstPtr gpuDynInst = sender_state->_gpuDynInst;
-    ComputeUnit *compute_unit M5_VAR_USED = scalarDataPort->computeUnit;
+    ComputeUnit *compute_unit M5_VAR_USED = scalarDataPort.computeUnit;
  
-    if (!(scalarDataPort->sendTimingReq(pkt))) {
-        scalarDataPort->retries.push_back(pkt);
+    if (!(scalarDataPort.sendTimingReq(pkt))) {
+        scalarDataPort.retries.push_back(pkt);
  
          DPRINTF(GPUPort,
-                "CU%d: WF[%d][%d]: index %d, addr %#x data req failed!\n",
+                "CU%d: WF[%d][%d]: addr %#x data req failed!\n",
                  compute_unit->cu_id, gpuDynInst->simdId,
-                gpuDynInst->wfSlotId, scalarDataPort->index,
-                pkt->req->getPaddr());
+                gpuDynInst->wfSlotId, pkt->req->getPaddr());
      } else {
          DPRINTF(GPUPort,
-                "CU%d: WF[%d][%d]: gpuDynInst: %d, index %d, addr %#x data "
+                "CU%d: WF[%d][%d]: gpuDynInst: %d, addr %#x data "
                  "req sent!\n", compute_unit->cu_id, gpuDynInst->simdId,
                  gpuDynInst->wfSlotId, gpuDynInst->seqNum(),
-                scalarDataPort->index, pkt->req->getPaddr());
+                pkt->req->getPaddr());
      }
  }
  
@@ -1702,8 +1706,8 @@ ComputeUnit::ScalarDTLBPort::recvTimingResp(PacketPtr pkt)
      req_pkt->senderState =
          new ComputeUnit::ScalarDataPort::SenderState(gpuDynInst);
  
-    if (!computeUnit->scalarDataPort->sendTimingReq(req_pkt)) {
-        computeUnit->scalarDataPort->retries.push_back(req_pkt);
+    if (!computeUnit->scalarDataPort.sendTimingReq(req_pkt)) {
+        computeUnit->scalarDataPort.retries.push_back(req_pkt);
          DPRINTF(GPUMem, "send scalar req failed for: %s\n",
                  gpuDynInst->disassemble());
      } else {
@@ -2544,7 +2548,7 @@ ComputeUnit::sendToLds(GPUDynInstPtr gpuDynInst)
      // This is the SenderState needed upon return
      newPacket->senderState = new LDSPort::SenderState(gpuDynInst);
  
-    return ldsPort->sendTimingReq(newPacket);
+    return ldsPort.sendTimingReq(newPacket);
  }
  
  /**
diff --git a/src/gpu-compute/compute_unit.hh b/src/gpu-compute/compute_unit.hh

index 211dd5350c1d3849fce30af2c5405157bd9ed92c..f7484af87f2dafce6b7652dc32b56f67402e3056 100644 (file)
--- a/src/gpu-compute/compute_unit.hh
+++ b/src/gpu-compute/compute_unit.hh
@@ -448,7 +448,7 @@ class ComputeUnit : public ClockedObject
      void doSmReturn(GPUDynInstPtr gpuDynInst);
  
      virtual void init() override;
-    void sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt);
+    void sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt);
      void sendScalarRequest(GPUDynInstPtr gpuDynInst, PacketPtr pkt);
      void injectGlobalMemFence(GPUDynInstPtr gpuDynInst,
                                bool kernelMemSync,
@@ -652,16 +652,15 @@ class ComputeUnit : public ClockedObject
      class DataPort : public RequestPort
      {
        public:
-        DataPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
-            : RequestPort(_name, _cu), computeUnit(_cu),
-              index(_index) { }
+        DataPort(const std::string &_name, ComputeUnit *_cu, PortID id)
+            : RequestPort(_name, _cu, id), computeUnit(_cu) { }
  
          bool snoopRangeSent;
  
          struct SenderState : public Packet::SenderState
          {
              GPUDynInstPtr _gpuDynInst;
-            int port_index;
+            PortID port_index;
              Packet::SenderState *saved;
  
              SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index,
@@ -681,7 +680,6 @@ class ComputeUnit : public ClockedObject
  
        protected:
          ComputeUnit *computeUnit;
-        int index;
  
          virtual bool recvTimingResp(PacketPtr pkt);
          virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
@@ -702,11 +700,9 @@ class ComputeUnit : public ClockedObject
      class ScalarDataPort : public RequestPort
      {
        public:
-        ScalarDataPort(const std::string &_name, ComputeUnit *_cu,
-                       PortID _index)
-            : RequestPort(_name, _cu, _index), computeUnit(_cu), index(_index)
+        ScalarDataPort(const std::string &_name, ComputeUnit *_cu)
+            : RequestPort(_name, _cu), computeUnit(_cu)
          {
-            (void)index;
          }
  
          bool recvTimingResp(PacketPtr pkt) override;
@@ -727,11 +723,11 @@ class ComputeUnit : public ClockedObject
          class MemReqEvent : public Event
          {
            private:
-            ScalarDataPort *scalarDataPort;
+            ScalarDataPort &scalarDataPort;
              PacketPtr pkt;
  
            public:
-            MemReqEvent(ScalarDataPort *_scalar_data_port, PacketPtr _pkt)
+            MemReqEvent(ScalarDataPort &_scalar_data_port, PacketPtr _pkt)
                  : Event(), scalarDataPort(_scalar_data_port), pkt(_pkt)
              {
                setFlags(Event::AutoDelete);
@@ -745,16 +741,14 @@ class ComputeUnit : public ClockedObject
  
        private:
          ComputeUnit *computeUnit;
-        PortID index;
      };
  
      // Instruction cache access port
      class SQCPort : public RequestPort
      {
        public:
-        SQCPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
-            : RequestPort(_name, _cu), computeUnit(_cu),
-              index(_index) { }
+        SQCPort(const std::string &_name, ComputeUnit *_cu)
+            : RequestPort(_name, _cu), computeUnit(_cu) { }
  
          bool snoopRangeSent;
  
@@ -775,7 +769,6 @@ class ComputeUnit : public ClockedObject
  
        protected:
          ComputeUnit *computeUnit;
-        int index;
  
          virtual bool recvTimingResp(PacketPtr pkt);
          virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
@@ -795,9 +788,9 @@ class ComputeUnit : public ClockedObject
      class DTLBPort : public RequestPort
      {
        public:
-        DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
-            : RequestPort(_name, _cu), computeUnit(_cu),
-              index(_index), stalled(false)
+        DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID id)
+            : RequestPort(_name, _cu, id), computeUnit(_cu),
+              stalled(false)
          { }
  
          bool isStalled() { return stalled; }
@@ -820,7 +813,7 @@ class ComputeUnit : public ClockedObject
  
              // the lane in the memInst this is associated with, so we send
              // the memory request down the right port
-            int portIndex;
+            PortID portIndex;
  
              // constructor used for packets involved in timing accesses
              SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
@@ -830,7 +823,6 @@ class ComputeUnit : public ClockedObject
  
        protected:
          ComputeUnit *computeUnit;
-        int index;
          bool stalled;
  
          virtual bool recvTimingResp(PacketPtr pkt);
@@ -913,8 +905,8 @@ class ComputeUnit : public ClockedObject
      class LDSPort : public RequestPort
      {
        public:
-        LDSPort(const std::string &_name, ComputeUnit *_cu, PortID _id)
-        : RequestPort(_name, _cu, _id), computeUnit(_cu)
+        LDSPort(const std::string &_name, ComputeUnit *_cu)
+        : RequestPort(_name, _cu), computeUnit(_cu)
          {
          }
  
@@ -983,13 +975,7 @@ class ComputeUnit : public ClockedObject
      /** The port to access the Local Data Store
       *  Can be connected to a LDS object
       */
-    LDSPort *ldsPort = nullptr;
-
-    LDSPort *
-    getLdsPort() const
-    {
-        return ldsPort;
-    }
+    LDSPort ldsPort;
  
      TokenManager *
      getTokenManager()
@@ -1000,54 +986,37 @@ class ComputeUnit : public ClockedObject
      /** The memory port for SIMD data accesses.
       *  Can be connected to PhysMem for Ruby for timing simulations
       */
-    std::vector<DataPort*> memPort;
+    std::vector<DataPort> memPort;
      // port to the TLB hierarchy (i.e., the L1 TLB)
-    std::vector<DTLBPort*> tlbPort;
+    std::vector<DTLBPort> tlbPort;
      // port to the scalar data cache
-    ScalarDataPort *scalarDataPort;
+    ScalarDataPort scalarDataPort;
      // port to the scalar data TLB
-    ScalarDTLBPort *scalarDTLBPort;
+    ScalarDTLBPort scalarDTLBPort;
      // port to the SQC (i.e. the I-cache)
-    SQCPort *sqcPort;
+    SQCPort sqcPort;
      // port to the SQC TLB (there's a separate TLB for each I-cache)
-    ITLBPort *sqcTLBPort;
+    ITLBPort sqcTLBPort;
  
      Port &
      getPort(const std::string &if_name, PortID idx) override
      {
-        if (if_name == "memory_port") {
-            memPort[idx] = new DataPort(csprintf("%s-port%d", name(), idx),
-                                        this, idx);
-            return *memPort[idx];
-        } else if (if_name == "translation_port") {
-            tlbPort[idx] = new DTLBPort(csprintf("%s-port%d", name(), idx),
-                                        this, idx);
-            return *tlbPort[idx];
+        if (if_name == "memory_port" && idx < memPort.size()) {
+            return memPort[idx];
+        } else if (if_name == "translation_port" && idx < tlbPort.size()) {
+            return tlbPort[idx];
          } else if (if_name == "scalar_port") {
-            scalarDataPort = new ScalarDataPort(csprintf("%s-port%d", name(),
-                                                idx), this, idx);
-            return *scalarDataPort;
+            return scalarDataPort;
          } else if (if_name == "scalar_tlb_port") {
-            scalarDTLBPort = new ScalarDTLBPort(csprintf("%s-port", name()),
-                                                this);
-            return *scalarDTLBPort;
+            return scalarDTLBPort;
          } else if (if_name == "sqc_port") {
-            sqcPort = new SQCPort(csprintf("%s-port%d", name(), idx),
-                                  this, idx);
-            return *sqcPort;
+            return sqcPort;
          } else if (if_name == "sqc_tlb_port") {
-            sqcTLBPort = new ITLBPort(csprintf("%s-port", name()), this);
-            return *sqcTLBPort;
+            return sqcTLBPort;
          } else if (if_name == "ldsPort") {
-            if (ldsPort) {
-                fatal("an LDS port was already allocated");
-            }
-            ldsPort = new LDSPort(csprintf("%s-port", name()), this, idx);
-            return *ldsPort;
-        } else if (if_name == "gmTokenPort") {
-            return gmTokenPort;
+            return ldsPort;
          } else {
-            panic("incorrect port name");
+            return ClockedObject::getPort(if_name, idx);
          }
      }
  
diff --git a/src/gpu-compute/fetch_unit.cc b/src/gpu-compute/fetch_unit.cc

index ac9a5a656a84c7c4d1befab047b6b82856d02e6b..3a139f530e766cdbff63a555e1760e07a14bbc7b 100644 (file)
--- a/src/gpu-compute/fetch_unit.cc
+++ b/src/gpu-compute/fetch_unit.cc
@@ -174,24 +174,24 @@ FetchUnit::initiateFetch(Wavefront *wavefront)
                                                   computeUnit.shader->gpuTc,
                                                   false, pkt->senderState);
  
-        if (computeUnit.sqcTLBPort->isStalled()) {
-            assert(computeUnit.sqcTLBPort->retries.size() > 0);
+        if (computeUnit.sqcTLBPort.isStalled()) {
+            assert(computeUnit.sqcTLBPort.retries.size() > 0);
  
              DPRINTF(GPUTLB, "Failed to send TLB req for FETCH addr %#x\n",
                      vaddr);
  
-            computeUnit.sqcTLBPort->retries.push_back(pkt);
-        } else if (!computeUnit.sqcTLBPort->sendTimingReq(pkt)) {
+            computeUnit.sqcTLBPort.retries.push_back(pkt);
+        } else if (!computeUnit.sqcTLBPort.sendTimingReq(pkt)) {
              // Stall the data port;
              // No more packet is issued till
              // ruby indicates resources are freed by
              // a recvReqRetry() call back on this port.
-            computeUnit.sqcTLBPort->stallPort();
+            computeUnit.sqcTLBPort.stallPort();
  
              DPRINTF(GPUTLB, "Failed to send TLB req for FETCH addr %#x\n",
                      vaddr);
  
-            computeUnit.sqcTLBPort->retries.push_back(pkt);
+            computeUnit.sqcTLBPort.retries.push_back(pkt);
          } else {
              DPRINTF(GPUTLB, "sent FETCH translation request for %#x\n", vaddr);
          }
@@ -200,7 +200,7 @@ FetchUnit::initiateFetch(Wavefront *wavefront)
              new TheISA::GpuTLB::TranslationState(BaseTLB::Execute,
                                                   computeUnit.shader->gpuTc);
  
-        computeUnit.sqcTLBPort->sendFunctional(pkt);
+        computeUnit.sqcTLBPort.sendFunctional(pkt);
  
          TheISA::GpuTLB::TranslationState *sender_state =
               safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
@@ -257,8 +257,8 @@ FetchUnit::fetch(PacketPtr pkt, Wavefront *wavefront)
      if (timingSim) {
          // translation is done. Send the appropriate timing memory request.
  
-        if (!computeUnit.sqcPort->sendTimingReq(pkt)) {
-            computeUnit.sqcPort->retries.push_back(std::make_pair(pkt,
+        if (!computeUnit.sqcPort.sendTimingReq(pkt)) {
+            computeUnit.sqcPort.retries.push_back(std::make_pair(pkt,
                                                                     wavefront));
  
              DPRINTF(GPUPort, "CU%d: WF[%d][%d]: Fetch addr %#x failed!\n",
@@ -270,7 +270,7 @@ FetchUnit::fetch(PacketPtr pkt, Wavefront *wavefront)
                      pkt->req->getPaddr());
          }
      } else {
-        computeUnit.sqcPort->sendFunctional(pkt);
+        computeUnit.sqcPort.sendFunctional(pkt);
          processFetchReturn(pkt);
      }
  }
diff --git a/src/gpu-compute/shader.cc b/src/gpu-compute/shader.cc

index 1d88e855a47529aec1b08fa7606bccb776a63f78..7b4f20f16f5ce1ceb8cacd151b9be3b360b50841 100644 (file)
--- a/src/gpu-compute/shader.cc
+++ b/src/gpu-compute/shader.cc
@@ -400,8 +400,8 @@ Shader::doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data,
  
          // fixme: this should be cuList[cu_id] if cu_id != n_cu
          // The latter requires a memPort in the dispatcher
-        cuList[0]->memPort[0]->sendFunctional(new_pkt1);
-        cuList[0]->memPort[0]->sendFunctional(new_pkt2);
+        cuList[0]->memPort[0].sendFunctional(new_pkt1);
+        cuList[0]->memPort[0].sendFunctional(new_pkt2);
  
          delete new_pkt1;
          delete new_pkt2;
@@ -419,7 +419,7 @@ Shader::doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data,
  
          // fixme: this should be cuList[cu_id] if cu_id != n_cu
          // The latter requires a memPort in the dispatcher
-        cuList[0]->memPort[0]->sendFunctional(new_pkt);
+        cuList[0]->memPort[0].sendFunctional(new_pkt);
  
          delete new_pkt;
          delete pkt;
@@ -507,7 +507,7 @@ Shader::functionalTLBAccess(PacketPtr pkt, int cu_id, BaseTLB::Mode mode)
      // it's ok tp send all accesses through lane 0
      // since the lane # is not known here,
      // This isn't important since these are functional accesses.
-    cuList[cu_id]->tlbPort[0]->sendFunctional(pkt);
+    cuList[cu_id]->tlbPort[0].sendFunctional(pkt);
  
      /* safe_cast the senderState */
      TheISA::GpuTLB::TranslationState *sender_state =
author	Tony Gutierrez <anthony.gutierrez@amd.com>
	Fri, 29 Mar 2019 21:48:39 +0000 (17:48 -0400)
committer	Anthony Gutierrez <anthony.gutierrez@amd.com>
	Thu, 27 Aug 2020 16:31:46 +0000 (16:31 +0000)
src/gpu-compute/compute_unit.cc		patch \| blob \| history
src/gpu-compute/compute_unit.hh		patch \| blob \| history
src/gpu-compute/fetch_unit.cc		patch \| blob \| history
src/gpu-compute/shader.cc		patch \| blob \| history