dev-arm: Add a VExpress_GEM5_V2 platform with GICv3 support
[gem5.git] / src / dev / dma_device.cc
index 401f910acab03b3f56ebfe21ead8a0fc890f2d26..c445fbc779758fc772bbe54058f806f4da07fc67 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012 ARM Limited
+ * Copyright (c) 2012, 2015, 2017 ARM Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
  *
  * Authors: Ali Saidi
  *          Nathan Binkert
+ *          Andreas Hansson
+ *          Andreas Sandberg
  */
 
+#include "dev/dma_device.hh"
+
+#include <utility>
+
 #include "base/chunk_generator.hh"
 #include "debug/DMA.hh"
-#include "dev/dma_device.hh"
+#include "debug/Drain.hh"
+#include "mem/port_proxy.hh"
 #include "sim/system.hh"
 
-DmaPort::DmaPort(MemObject *dev, System *s, Tick min_backoff, Tick max_backoff)
-    : MasterPort(dev->name() + "-dma", dev), device(dev), sys(s),
-      masterId(s->getMasterId(dev->name())),
-      pendingCount(0), actionInProgress(0), drainEvent(NULL),
-      backoffTime(0), minBackoffDelay(min_backoff),
-      maxBackoffDelay(max_backoff), inRetry(false),
-      backoffEvent(this)
+DmaPort::DmaPort(MemObject *dev, System *s)
+    : MasterPort(dev->name() + ".dma", dev),
+      device(dev), sys(s), masterId(s->getMasterId(dev)),
+      sendEvent([this]{ sendDma(); }, dev->name()),
+      pendingCount(0), inRetry(false)
 { }
 
+void
+DmaPort::handleResp(PacketPtr pkt, Tick delay)
+{
+    // should always see a response with a sender state
+    assert(pkt->isResponse());
+
+    // get the DMA sender state
+    DmaReqState *state = dynamic_cast<DmaReqState*>(pkt->senderState);
+    assert(state);
+
+    DPRINTF(DMA, "Received response %s for addr: %#x size: %d nb: %d,"  \
+            " tot: %d sched %d\n",
+            pkt->cmdString(), pkt->getAddr(), pkt->req->getSize(),
+            state->numBytes, state->totBytes,
+            state->completionEvent ?
+            state->completionEvent->scheduled() : 0);
+
+    assert(pendingCount != 0);
+    pendingCount--;
+
+    // update the number of bytes received based on the request rather
+    // than the packet as the latter could be rounded up to line sizes
+    state->numBytes += pkt->req->getSize();
+    assert(state->totBytes >= state->numBytes);
+
+    // if we have reached the total number of bytes for this DMA
+    // request, then signal the completion and delete the sate
+    if (state->totBytes == state->numBytes) {
+        if (state->completionEvent) {
+            delay += state->delay;
+            device->schedule(state->completionEvent, curTick() + delay);
+        }
+        delete state;
+    }
+
+    // delete the packet
+    delete pkt;
+
+    // we might be drained at this point, if so signal the drain event
+    if (pendingCount == 0)
+        signalDrainDone();
+}
+
 bool
 DmaPort::recvTimingResp(PacketPtr pkt)
 {
-    if (pkt->wasNacked()) {
-        DPRINTF(DMA, "Received nacked %s addr %#x\n",
-                pkt->cmdString(), pkt->getAddr());
-
-        if (backoffTime < minBackoffDelay)
-            backoffTime = minBackoffDelay;
-        else if (backoffTime < maxBackoffDelay)
-            backoffTime <<= 1;
-
-        device->reschedule(backoffEvent, curTick() + backoffTime, true);
-
-        DPRINTF(DMA, "Backoff time set to %d ticks\n", backoffTime);
-
-        pkt->reinitNacked();
-        queueDma(pkt, true);
-    } else if (pkt->senderState) {
-        DmaReqState *state;
-        backoffTime >>= 2;
-
-        DPRINTF(DMA, "Received response %s addr %#x size %#x\n",
-                pkt->cmdString(), pkt->getAddr(), pkt->req->getSize());
-        state = dynamic_cast<DmaReqState*>(pkt->senderState);
-        pendingCount--;
-
-        assert(pendingCount >= 0);
-        assert(state);
-
-        // We shouldn't ever get a block in ownership state
-        assert(!(pkt->memInhibitAsserted() && !pkt->sharedAsserted()));
-
-        state->numBytes += pkt->req->getSize();
-        assert(state->totBytes >= state->numBytes);
-        if (state->totBytes == state->numBytes) {
-            if (state->completionEvent) {
-                if (state->delay)
-                    device->schedule(state->completionEvent,
-                                     curTick() + state->delay);
-                else
-                    state->completionEvent->process();
-            }
-            delete state;
-        }
-        delete pkt->req;
-        delete pkt;
+    // We shouldn't ever get a cacheable block in Modified state
+    assert(pkt->req->isUncacheable() ||
+           !(pkt->cacheResponding() && !pkt->hasSharers()));
 
-        if (pendingCount == 0 && drainEvent) {
-            drainEvent->process();
-            drainEvent = NULL;
-        }
-    }  else {
-        panic("Got packet without sender state... huh?\n");
-    }
+    handleResp(pkt);
 
     return true;
 }
 
 DmaDevice::DmaDevice(const Params *p)
-    : PioDevice(p), dmaPort(this, sys, params()->min_backoff_delay,
-                            params()->max_backoff_delay)
+    : PioDevice(p), dmaPort(this, sys)
 { }
 
 void
@@ -127,184 +127,384 @@ DmaDevice::init()
     PioDevice::init();
 }
 
-unsigned int
-DmaDevice::drain(Event *de)
+DrainState
+DmaPort::drain()
 {
-    unsigned int count;
-    count = pioPort.drain(de) + dmaPort.drain(de);
-    if (count)
-        changeState(Draining);
-    else
-        changeState(Drained);
-    return count;
-}
-
-unsigned int
-DmaPort::drain(Event *de)
-{
-    if (pendingCount == 0)
-        return 0;
-    drainEvent = de;
-    return 1;
+    if (pendingCount == 0) {
+        return DrainState::Drained;
+    } else {
+        DPRINTF(Drain, "DmaPort not drained\n");
+        return DrainState::Draining;
+    }
 }
 
 void
-DmaPort::recvRetry()
+DmaPort::recvReqRetry()
 {
     assert(transmitList.size());
-    bool result = true;
-    do {
-        PacketPtr pkt = transmitList.front();
-        DPRINTF(DMA, "Retry on %s addr %#x\n",
-                pkt->cmdString(), pkt->getAddr());
-        result = sendTimingReq(pkt);
-        if (result) {
-            DPRINTF(DMA, "-- Done\n");
-            transmitList.pop_front();
-            inRetry = false;
-        } else {
-            inRetry = true;
-            DPRINTF(DMA, "-- Failed, queued\n");
-        }
-    } while (!backoffTime &&  result && transmitList.size());
-
-    if (transmitList.size() && backoffTime && !inRetry) {
-        DPRINTF(DMA, "Scheduling backoff for %d\n", curTick()+backoffTime);
-        if (!backoffEvent.scheduled())
-            device->schedule(backoffEvent, backoffTime + curTick());
-    }
-    DPRINTF(DMA, "TransmitList: %d, backoffTime: %d inRetry: %d es: %d\n",
-            transmitList.size(), backoffTime, inRetry,
-            backoffEvent.scheduled());
+    trySendTimingReq();
 }
 
-void
+RequestPtr
 DmaPort::dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
                    uint8_t *data, Tick delay, Request::Flags flag)
 {
-    assert(device->getState() == SimObject::Running);
-
+    // one DMA request sender state for every action, that is then
+    // split into many requests and packets based on the block size,
+    // i.e. cache line size
     DmaReqState *reqState = new DmaReqState(event, size, delay);
 
+    // (functionality added for Table Walker statistics)
+    // We're only interested in this when there will only be one request.
+    // For simplicity, we return the last request, which would also be
+    // the only request in that case.
+    RequestPtr req = NULL;
 
     DPRINTF(DMA, "Starting DMA for addr: %#x size: %d sched: %d\n", addr, size,
-            event ? event->scheduled() : -1 );
-    for (ChunkGenerator gen(addr, size, peerBlockSize());
+            event ? event->scheduled() : -1);
+    for (ChunkGenerator gen(addr, size, sys->cacheLineSize());
          !gen.done(); gen.next()) {
-            Request *req = new Request(gen.addr(), gen.size(), flag, masterId);
-            PacketPtr pkt = new Packet(req, cmd);
 
-            // Increment the data pointer on a write
-            if (data)
-                pkt->dataStatic(data + gen.complete());
+        req = std::make_shared<Request>(
+            gen.addr(), gen.size(), flag, masterId);
+
+        req->taskId(ContextSwitchTaskId::DMA);
+        PacketPtr pkt = new Packet(req, cmd);
+
+        // Increment the data pointer on a write
+        if (data)
+            pkt->dataStatic(data + gen.complete());
 
-            pkt->senderState = reqState;
+        pkt->senderState = reqState;
 
-            assert(pendingCount >= 0);
-            pendingCount++;
-            DPRINTF(DMA, "--Queuing DMA for addr: %#x size: %d\n", gen.addr(),
-                    gen.size());
-            queueDma(pkt);
+        DPRINTF(DMA, "--Queuing DMA for addr: %#x size: %d\n", gen.addr(),
+                gen.size());
+        queueDma(pkt);
     }
 
+    // in zero time also initiate the sending of the packets we have
+    // just created, for atomic this involves actually completing all
+    // the requests
+    sendDma();
+
+    return req;
 }
 
 void
-DmaPort::queueDma(PacketPtr pkt, bool front)
+DmaPort::queueDma(PacketPtr pkt)
 {
+    transmitList.push_back(pkt);
 
-    if (front)
-        transmitList.push_front(pkt);
-    else
-        transmitList.push_back(pkt);
-    sendDma();
+    // remember that we have another packet pending, this will only be
+    // decremented once a response comes back
+    pendingCount++;
+}
+
+void
+DmaPort::trySendTimingReq()
+{
+    // send the first packet on the transmit list and schedule the
+    // following send if it is successful
+    PacketPtr pkt = transmitList.front();
+
+    DPRINTF(DMA, "Trying to send %s addr %#x\n", pkt->cmdString(),
+            pkt->getAddr());
+
+    inRetry = !sendTimingReq(pkt);
+    if (!inRetry) {
+        transmitList.pop_front();
+        DPRINTF(DMA, "-- Done\n");
+        // if there is more to do, then do so
+        if (!transmitList.empty())
+            // this should ultimately wait for as many cycles as the
+            // device needs to send the packet, but currently the port
+            // does not have any known width so simply wait a single
+            // cycle
+            device->schedule(sendEvent, device->clockEdge(Cycles(1)));
+    } else {
+        DPRINTF(DMA, "-- Failed, waiting for retry\n");
+    }
+
+    DPRINTF(DMA, "TransmitList: %d, inRetry: %d\n",
+            transmitList.size(), inRetry);
 }
 
 void
 DmaPort::sendDma()
 {
-    // some kind of selction between access methods
+    // some kind of selcetion between access methods
     // more work is going to have to be done to make
     // switching actually work
     assert(transmitList.size());
-    PacketPtr pkt = transmitList.front();
 
-    Enums::MemoryMode state = sys->getMemoryMode();
-    if (state == Enums::timing) {
-        if (backoffEvent.scheduled() || inRetry) {
-            DPRINTF(DMA, "Can't send immediately, waiting for retry or backoff timer\n");
+    if (sys->isTimingMode()) {
+        // if we are either waiting for a retry or are still waiting
+        // after sending the last packet, then do not proceed
+        if (inRetry || sendEvent.scheduled()) {
+            DPRINTF(DMA, "Can't send immediately, waiting to send\n");
             return;
         }
 
-        DPRINTF(DMA, "Attempting to send %s addr %#x\n",
-                pkt->cmdString(), pkt->getAddr());
-
-        bool result;
-        do {
-            result = sendTimingReq(pkt);
-            if (result) {
-                transmitList.pop_front();
-                DPRINTF(DMA, "-- Done\n");
-            } else {
-                inRetry = true;
-                DPRINTF(DMA, "-- Failed: queued\n");
-            }
-        } while (result && !backoffTime && transmitList.size());
-
-        if (transmitList.size() && backoffTime && !inRetry &&
-                !backoffEvent.scheduled()) {
-            DPRINTF(DMA, "-- Scheduling backoff timer for %d\n",
-                    backoffTime+curTick());
-            device->schedule(backoffEvent, backoffTime + curTick());
-        }
-    } else if (state == Enums::atomic) {
-        transmitList.pop_front();
+        trySendTimingReq();
+    } else if (sys->isAtomicMode()) {
+        // send everything there is to send in zero time
+        while (!transmitList.empty()) {
+            PacketPtr pkt = transmitList.front();
+            transmitList.pop_front();
+
+            DPRINTF(DMA, "Sending  DMA for addr: %#x size: %d\n",
+                    pkt->req->getPaddr(), pkt->req->getSize());
+            Tick lat = sendAtomic(pkt);
 
-        Tick lat;
-        DPRINTF(DMA, "--Sending  DMA for addr: %#x size: %d\n",
-                pkt->req->getPaddr(), pkt->req->getSize());
-        lat = sendAtomic(pkt);
-        assert(pkt->senderState);
-        DmaReqState *state = dynamic_cast<DmaReqState*>(pkt->senderState);
-        assert(state);
-        state->numBytes += pkt->req->getSize();
-
-        DPRINTF(DMA, "--Received response for  DMA for addr: %#x size: %d nb: %d, tot: %d sched %d\n",
-                pkt->req->getPaddr(), pkt->req->getSize(), state->numBytes,
-                state->totBytes,
-                state->completionEvent ? state->completionEvent->scheduled() : 0 );
-
-        if (state->totBytes == state->numBytes) {
-            if (state->completionEvent) {
-                assert(!state->completionEvent->scheduled());
-                device->schedule(state->completionEvent,
-                                 curTick() + lat + state->delay);
-            }
-            delete state;
-            delete pkt->req;
+            handleResp(pkt, lat);
         }
-        pendingCount--;
-        assert(pendingCount >= 0);
-        delete pkt;
+    } else
+        panic("Unknown memory mode.");
+}
 
-        if (pendingCount == 0 && drainEvent) {
-            drainEvent->process();
-            drainEvent = NULL;
+BaseMasterPort &
+DmaDevice::getMasterPort(const std::string &if_name, PortID idx)
+{
+    if (if_name == "dma") {
+        return dmaPort;
+    }
+    return PioDevice::getMasterPort(if_name, idx);
+}
+
+
+
+
+
+DmaReadFifo::DmaReadFifo(DmaPort &_port, size_t size,
+                         unsigned max_req_size,
+                         unsigned max_pending,
+                         Request::Flags flags)
+    : maxReqSize(max_req_size), fifoSize(size),
+      reqFlags(flags), port(_port),
+      buffer(size),
+      nextAddr(0), endAddr(0)
+{
+    freeRequests.resize(max_pending);
+    for (auto &e : freeRequests)
+        e.reset(new DmaDoneEvent(this, max_req_size));
+
+}
+
+DmaReadFifo::~DmaReadFifo()
+{
+    for (auto &p : pendingRequests) {
+        DmaDoneEvent *e(p.release());
+
+        if (e->done()) {
+            delete e;
+        } else {
+            // We can't kill in-flight DMAs, so we'll just transfer
+            // ownership to the event queue so that they get freed
+            // when they are done.
+            e->kill();
         }
+    }
+}
+
+void
+DmaReadFifo::serialize(CheckpointOut &cp) const
+{
+    assert(pendingRequests.empty());
 
-   } else
-       panic("Unknown memory command state.");
+    SERIALIZE_CONTAINER(buffer);
+    SERIALIZE_SCALAR(endAddr);
+    SERIALIZE_SCALAR(nextAddr);
 }
 
-DmaDevice::~DmaDevice()
+void
+DmaReadFifo::unserialize(CheckpointIn &cp)
 {
+    UNSERIALIZE_CONTAINER(buffer);
+    UNSERIALIZE_SCALAR(endAddr);
+    UNSERIALIZE_SCALAR(nextAddr);
 }
 
-MasterPort &
-DmaDevice::getMasterPort(const std::string &if_name, int idx)
+bool
+DmaReadFifo::tryGet(uint8_t *dst, size_t len)
 {
-    if (if_name == "dma") {
-        return dmaPort;
+    if (buffer.size() >= len) {
+        buffer.read(dst, len);
+        resumeFill();
+        return true;
+    } else {
+        return false;
     }
-    return PioDevice::getMasterPort(if_name, idx);
+}
+
+void
+DmaReadFifo::get(uint8_t *dst, size_t len)
+{
+    const bool success(tryGet(dst, len));
+    panic_if(!success, "Buffer underrun in DmaReadFifo::get()\n");
+}
+
+void
+DmaReadFifo::startFill(Addr start, size_t size)
+{
+    assert(atEndOfBlock());
+
+    nextAddr = start;
+    endAddr = start + size;
+    resumeFill();
+}
+
+void
+DmaReadFifo::stopFill()
+{
+    // Prevent new DMA requests by setting the next address to the end
+    // address. Pending requests will still complete.
+    nextAddr = endAddr;
+
+    // Flag in-flight accesses as canceled. This prevents their data
+    // from being written to the FIFO.
+    for (auto &p : pendingRequests)
+        p->cancel();
+}
+
+void
+DmaReadFifo::resumeFill()
+{
+    // Don't try to fetch more data if we are draining. This ensures
+    // that the DMA engine settles down before we checkpoint it.
+    if (drainState() == DrainState::Draining)
+        return;
+
+    const bool old_eob(atEndOfBlock());
+
+    if (port.sys->bypassCaches())
+        resumeFillFunctional();
+    else
+        resumeFillTiming();
+
+    if (!old_eob && atEndOfBlock())
+        onEndOfBlock();
+}
+
+void
+DmaReadFifo::resumeFillFunctional()
+{
+    const size_t fifo_space = buffer.capacity() - buffer.size();
+    const size_t kvm_watermark = port.sys->cacheLineSize();
+    if (fifo_space >= kvm_watermark || buffer.capacity() < kvm_watermark) {
+        const size_t block_remaining = endAddr - nextAddr;
+        const size_t xfer_size = std::min(fifo_space, block_remaining);
+        std::vector<uint8_t> tmp_buffer(xfer_size);
+
+        assert(pendingRequests.empty());
+        DPRINTF(DMA, "KVM Bypassing startAddr=%#x xfer_size=%#x " \
+                "fifo_space=%#x block_remaining=%#x\n",
+                nextAddr, xfer_size, fifo_space, block_remaining);
+
+        port.sys->physProxy.readBlob(nextAddr, tmp_buffer.data(), xfer_size);
+        buffer.write(tmp_buffer.begin(), xfer_size);
+        nextAddr += xfer_size;
+    }
+}
+
+void
+DmaReadFifo::resumeFillTiming()
+{
+    size_t size_pending(0);
+    for (auto &e : pendingRequests)
+        size_pending += e->requestSize();
+
+    while (!freeRequests.empty() && !atEndOfBlock()) {
+        const size_t req_size(std::min(maxReqSize, endAddr - nextAddr));
+        if (buffer.size() + size_pending + req_size > fifoSize)
+            break;
+
+        DmaDoneEventUPtr event(std::move(freeRequests.front()));
+        freeRequests.pop_front();
+        assert(event);
+
+        event->reset(req_size);
+        port.dmaAction(MemCmd::ReadReq, nextAddr, req_size, event.get(),
+                       event->data(), 0, reqFlags);
+        nextAddr += req_size;
+        size_pending += req_size;
+
+        pendingRequests.emplace_back(std::move(event));
+    }
+}
+
+void
+DmaReadFifo::dmaDone()
+{
+    const bool old_active(isActive());
+
+    handlePending();
+    resumeFill();
+
+    if (old_active && !isActive())
+        onIdle();
+}
+
+void
+DmaReadFifo::handlePending()
+{
+    while (!pendingRequests.empty() && pendingRequests.front()->done()) {
+        // Get the first finished pending request
+        DmaDoneEventUPtr event(std::move(pendingRequests.front()));
+        pendingRequests.pop_front();
+
+        if (!event->canceled())
+            buffer.write(event->data(), event->requestSize());
+
+        // Move the event to the list of free requests
+        freeRequests.emplace_back(std::move(event));
+    }
+
+    if (pendingRequests.empty())
+        signalDrainDone();
+}
+
+DrainState
+DmaReadFifo::drain()
+{
+    return pendingRequests.empty() ? DrainState::Drained : DrainState::Draining;
+}
+
+
+DmaReadFifo::DmaDoneEvent::DmaDoneEvent(DmaReadFifo *_parent,
+                                        size_t max_size)
+    : parent(_parent), _done(false), _canceled(false), _data(max_size, 0)
+{
+}
+
+void
+DmaReadFifo::DmaDoneEvent::kill()
+{
+    parent = nullptr;
+    setFlags(AutoDelete);
+}
+
+void
+DmaReadFifo::DmaDoneEvent::cancel()
+{
+    _canceled = true;
+}
+
+void
+DmaReadFifo::DmaDoneEvent::reset(size_t size)
+{
+    assert(size <= _data.size());
+    _done = false;
+    _canceled = false;
+    _requestSize = size;
+}
+
+void
+DmaReadFifo::DmaDoneEvent::process()
+{
+    if (!parent)
+        return;
+
+    assert(!_done);
+    _done = true;
+    parent->dmaDone();
 }