From 48e43c9ad1cd292b494f3d05f9d13845dd1a6d1e Mon Sep 17 00:00:00 2001 From: Michael LeBeane Date: Wed, 26 Oct 2016 22:48:37 -0400 Subject: [PATCH] ruby: Allow multiple outstanding DMA requests DMA sequencers and protocols can currently only issue one DMA access at a time. This patch implements the necessary functionality to support multiple outstanding DMA requests in Ruby. --- src/mem/protocol/MESI_Two_Level-dma.sm | 84 ++++++++++++++++++--- src/mem/protocol/MI_example-dma.sm | 84 +++++++++++++++++---- src/mem/protocol/MOESI_CMP_directory-dma.sm | 4 +- src/mem/protocol/MOESI_CMP_token-dma.sm | 82 ++++++++++++++++---- src/mem/protocol/MOESI_hammer-dma.sm | 84 +++++++++++++++++---- src/mem/protocol/RubySlicc_Types.sm | 4 +- src/mem/ruby/system/DMASequencer.cc | 84 ++++++++++++++------- src/mem/ruby/system/DMASequencer.hh | 23 ++++-- src/mem/ruby/system/Sequencer.py | 1 + 9 files changed, 357 insertions(+), 93 deletions(-) diff --git a/src/mem/protocol/MESI_Two_Level-dma.sm b/src/mem/protocol/MESI_Two_Level-dma.sm index 68cb7e968..ecda3bd03 100644 --- a/src/mem/protocol/MESI_Two_Level-dma.sm +++ b/src/mem/protocol/MESI_Two_Level-dma.sm @@ -50,15 +50,38 @@ machine(MachineType:DMA, "DMA Controller") Ack, desc="DMA write to memory completed"; } - State cur_state; + structure(TBE, desc="...") { + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="Data"; + } + + structure(TBETable, external = "yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + } + + void set_tbe(TBE b); + void unset_tbe(); + void wakeUpAllBuffers(); + + TBETable TBEs, template="", constructor="m_number_of_TBEs"; + Tick clockEdge(); - State getState(Addr addr) { - return cur_state; + State getState(TBE tbe, Addr addr) { + if (is_valid(tbe)) { + return tbe.TBEState; + } else { + return State:READY; + } } - void setState(Addr addr, State state) { - cur_state := state; + void setState(TBE tbe, Addr addr, State state) { + if (is_valid(tbe)) { + tbe.TBEState := state; + } } AccessPermission getAccessPermission(Addr addr) { @@ -82,9 +105,9 @@ machine(MachineType:DMA, "DMA Controller") if (dmaRequestQueue_in.isReady(clockEdge())) { peek(dmaRequestQueue_in, SequencerMsg) { if (in_msg.Type == SequencerRequestType:LD ) { - trigger(Event:ReadRequest, in_msg.LineAddress); + trigger(Event:ReadRequest, in_msg.LineAddress, TBEs[in_msg.LineAddress]); } else if (in_msg.Type == SequencerRequestType:ST) { - trigger(Event:WriteRequest, in_msg.LineAddress); + trigger(Event:WriteRequest, in_msg.LineAddress, TBEs[in_msg.LineAddress]); } else { error("Invalid request type"); } @@ -96,9 +119,11 @@ machine(MachineType:DMA, "DMA Controller") if (dmaResponseQueue_in.isReady(clockEdge())) { peek( dmaResponseQueue_in, ResponseMsg) { if (in_msg.Type == CoherenceResponseType:ACK) { - trigger(Event:Ack, makeLineAddress(in_msg.addr)); + trigger(Event:Ack, makeLineAddress(in_msg.addr), + TBEs[makeLineAddress(in_msg.addr)]); } else if (in_msg.Type == CoherenceResponseType:DATA) { - trigger(Event:Data, makeLineAddress(in_msg.addr)); + trigger(Event:Data, makeLineAddress(in_msg.addr), + TBEs[makeLineAddress(in_msg.addr)]); } else { error("Invalid response type"); } @@ -133,15 +158,30 @@ machine(MachineType:DMA, "DMA Controller") } action(a_ackCallback, "a", desc="Notify dma controller that write request completed") { - dma_sequencer.ackCallback(); + dma_sequencer.ackCallback(address); } action(d_dataCallback, "d", desc="Write data to dma sequencer") { - peek (dmaResponseQueue_in, ResponseMsg) { - dma_sequencer.dataCallback(in_msg.DataBlk); + dma_sequencer.dataCallback(tbe.DataBlk, address); + } + + action(t_updateTBEData, "t", desc="Update TBE Data") { + assert(is_valid(tbe)); + peek( dmaResponseQueue_in, ResponseMsg) { + tbe.DataBlk := in_msg.DataBlk; } } + action(v_allocateTBE, "v", desc="Allocate TBE entry") { + TBEs.allocate(address); + set_tbe(TBEs[address]); + } + + action(w_deallocateTBE, "w", desc="Deallocate TBE entry") { + TBEs.deallocate(address); + unset_tbe(); + } + action(p_popRequestQueue, "p", desc="Pop request queue") { dmaRequestQueue_in.dequeue(clockEdge()); } @@ -150,23 +190,43 @@ machine(MachineType:DMA, "DMA Controller") dmaResponseQueue_in.dequeue(clockEdge()); } + action(zz_stallAndWaitRequestQueue, "zz", desc="...") { + stall_and_wait(dmaRequestQueue_in, address); + } + + action(wkad_wakeUpAllDependents, "wkad", desc="wake-up all dependents") { + wakeUpAllBuffers(); + } + transition(READY, ReadRequest, BUSY_RD) { + v_allocateTBE; s_sendReadRequest; p_popRequestQueue; } transition(READY, WriteRequest, BUSY_WR) { + v_allocateTBE; s_sendWriteRequest; p_popRequestQueue; } transition(BUSY_RD, Data, READY) { + t_updateTBEData; d_dataCallback; + w_deallocateTBE; p_popResponseQueue; + wkad_wakeUpAllDependents; } transition(BUSY_WR, Ack, READY) { a_ackCallback; + w_deallocateTBE; p_popResponseQueue; + wkad_wakeUpAllDependents; } + + transition({BUSY_RD,BUSY_WR}, {ReadRequest,WriteRequest}) { + zz_stallAndWaitRequestQueue; + } + } diff --git a/src/mem/protocol/MI_example-dma.sm b/src/mem/protocol/MI_example-dma.sm index 6032229ee..aebdce81c 100644 --- a/src/mem/protocol/MI_example-dma.sm +++ b/src/mem/protocol/MI_example-dma.sm @@ -50,17 +50,38 @@ machine(MachineType:DMA, "DMA Controller") Ack, desc="DMA write to memory completed"; } - State cur_state; + structure(TBE, desc="...") { + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="Data"; + } + + structure(TBETable, external = "yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + } + + void set_tbe(TBE b); + void unset_tbe(); + void wakeUpAllBuffers(); + + TBETable TBEs, template="", constructor="m_number_of_TBEs"; Tick clockEdge(); - Cycles ticksToCycles(Tick t); - State getState(Addr addr) { - return cur_state; + State getState(TBE tbe, Addr addr) { + if (is_valid(tbe)) { + return tbe.TBEState; + } else { + return State:READY; + } } - void setState(Addr addr, State state) { - cur_state := state; + void setState(TBE tbe, Addr addr, State state) { + if (is_valid(tbe)) { + tbe.TBEState := state; + } } AccessPermission getAccessPermission(Addr addr) { @@ -84,9 +105,9 @@ machine(MachineType:DMA, "DMA Controller") if (dmaRequestQueue_in.isReady(clockEdge())) { peek(dmaRequestQueue_in, SequencerMsg) { if (in_msg.Type == SequencerRequestType:LD ) { - trigger(Event:ReadRequest, in_msg.LineAddress); + trigger(Event:ReadRequest, in_msg.LineAddress, TBEs[in_msg.LineAddress]); } else if (in_msg.Type == SequencerRequestType:ST) { - trigger(Event:WriteRequest, in_msg.LineAddress); + trigger(Event:WriteRequest, in_msg.LineAddress, TBEs[in_msg.LineAddress]); } else { error("Invalid request type"); } @@ -98,9 +119,9 @@ machine(MachineType:DMA, "DMA Controller") if (dmaResponseQueue_in.isReady(clockEdge())) { peek( dmaResponseQueue_in, DMAResponseMsg) { if (in_msg.Type == DMAResponseType:ACK) { - trigger(Event:Ack, in_msg.LineAddress); + trigger(Event:Ack, in_msg.LineAddress, TBEs[in_msg.LineAddress]); } else if (in_msg.Type == DMAResponseType:DATA) { - trigger(Event:Data, in_msg.LineAddress); + trigger(Event:Data, in_msg.LineAddress, TBEs[in_msg.LineAddress]); } else { error("Invalid response type"); } @@ -139,17 +160,30 @@ machine(MachineType:DMA, "DMA Controller") } action(a_ackCallback, "a", desc="Notify dma controller that write request completed") { - peek (dmaResponseQueue_in, DMAResponseMsg) { - dma_sequencer.ackCallback(); - } + dma_sequencer.ackCallback(address); } action(d_dataCallback, "d", desc="Write data to dma sequencer") { - peek (dmaResponseQueue_in, DMAResponseMsg) { - dma_sequencer.dataCallback(in_msg.DataBlk); + dma_sequencer.dataCallback(tbe.DataBlk, address); + } + + action(t_updateTBEData, "t", desc="Update TBE Data") { + assert(is_valid(tbe)); + peek( dmaResponseQueue_in, DMAResponseMsg) { + tbe.DataBlk := in_msg.DataBlk; } } + action(v_allocateTBE, "v", desc="Allocate TBE entry") { + TBEs.allocate(address); + set_tbe(TBEs[address]); + } + + action(w_deallocateTBE, "w", desc="Deallocate TBE entry") { + TBEs.deallocate(address); + unset_tbe(); + } + action(p_popRequestQueue, "p", desc="Pop request queue") { dmaRequestQueue_in.dequeue(clockEdge()); } @@ -158,23 +192,43 @@ machine(MachineType:DMA, "DMA Controller") dmaResponseQueue_in.dequeue(clockEdge()); } + action(zz_stallAndWaitRequestQueue, "zz", desc="...") { + stall_and_wait(dmaRequestQueue_in, address); + } + + action(wkad_wakeUpAllDependents, "wkad", desc="wake-up all dependents") { + wakeUpAllBuffers(); + } + transition(READY, ReadRequest, BUSY_RD) { + v_allocateTBE; s_sendReadRequest; p_popRequestQueue; } transition(READY, WriteRequest, BUSY_WR) { + v_allocateTBE; s_sendWriteRequest; p_popRequestQueue; } transition(BUSY_RD, Data, READY) { + t_updateTBEData; d_dataCallback; + w_deallocateTBE; p_popResponseQueue; + wkad_wakeUpAllDependents; } transition(BUSY_WR, Ack, READY) { a_ackCallback; + w_deallocateTBE; p_popResponseQueue; + wkad_wakeUpAllDependents; } + + transition({BUSY_RD,BUSY_WR}, {ReadRequest,WriteRequest}) { + zz_stallAndWaitRequestQueue; + } + } diff --git a/src/mem/protocol/MOESI_CMP_directory-dma.sm b/src/mem/protocol/MOESI_CMP_directory-dma.sm index 5eb2f2587..ccc7f8790 100644 --- a/src/mem/protocol/MOESI_CMP_directory-dma.sm +++ b/src/mem/protocol/MOESI_CMP_directory-dma.sm @@ -184,7 +184,7 @@ machine(MachineType:DMA, "DMA Controller") } action(a_ackCallback, "a", desc="Notify dma controller that write request completed") { - dma_sequencer.ackCallback(); + dma_sequencer.ackCallback(address); } action(o_checkForCompletion, "o", desc="Check if we have received all the messages required for completion") { @@ -236,7 +236,7 @@ machine(MachineType:DMA, "DMA Controller") action(d_dataCallbackFromTBE, "/d", desc="data callback with data from TBE") { assert(is_valid(tbe)); - dma_sequencer.dataCallback(tbe.DataBlk); + dma_sequencer.dataCallback(tbe.DataBlk, address); } action(v_allocateTBE, "v", desc="Allocate TBE entry") { diff --git a/src/mem/protocol/MOESI_CMP_token-dma.sm b/src/mem/protocol/MOESI_CMP_token-dma.sm index 3b144771d..01152100b 100644 --- a/src/mem/protocol/MOESI_CMP_token-dma.sm +++ b/src/mem/protocol/MOESI_CMP_token-dma.sm @@ -52,16 +52,38 @@ machine(MachineType:DMA, "DMA Controller") Ack, desc="DMA write to memory completed"; } - State cur_state; + structure(TBE, desc="...") { + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="Data"; + } + + structure(TBETable, external = "yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + } + + void set_tbe(TBE b); + void unset_tbe(); + void wakeUpAllBuffers(); + + TBETable TBEs, template="", constructor="m_number_of_TBEs"; Tick clockEdge(); - State getState(Addr addr) { - return cur_state; + State getState(TBE tbe, Addr addr) { + if (is_valid(tbe)) { + return tbe.TBEState; + } else { + return State:READY; + } } - void setState(Addr addr, State state) { - cur_state := state; + void setState(TBE tbe, Addr addr, State state) { + if (is_valid(tbe)) { + tbe.TBEState := state; + } } AccessPermission getAccessPermission(Addr addr) { @@ -85,9 +107,9 @@ machine(MachineType:DMA, "DMA Controller") if (dmaRequestQueue_in.isReady(clockEdge())) { peek(dmaRequestQueue_in, SequencerMsg) { if (in_msg.Type == SequencerRequestType:LD ) { - trigger(Event:ReadRequest, in_msg.LineAddress); + trigger(Event:ReadRequest, in_msg.LineAddress, TBEs[in_msg.LineAddress]); } else if (in_msg.Type == SequencerRequestType:ST) { - trigger(Event:WriteRequest, in_msg.LineAddress); + trigger(Event:WriteRequest, in_msg.LineAddress, TBEs[in_msg.LineAddress]); } else { error("Invalid request type"); } @@ -99,9 +121,9 @@ machine(MachineType:DMA, "DMA Controller") if (dmaResponseQueue_in.isReady(clockEdge())) { peek( dmaResponseQueue_in, DMAResponseMsg) { if (in_msg.Type == DMAResponseType:ACK) { - trigger(Event:Ack, in_msg.LineAddress); + trigger(Event:Ack, in_msg.LineAddress, TBEs[in_msg.LineAddress]); } else if (in_msg.Type == DMAResponseType:DATA) { - trigger(Event:Data, in_msg.LineAddress); + trigger(Event:Data, in_msg.LineAddress, TBEs[in_msg.LineAddress]); } else { error("Invalid response type"); } @@ -140,17 +162,30 @@ machine(MachineType:DMA, "DMA Controller") } action(a_ackCallback, "a", desc="Notify dma controller that write request completed") { - peek (dmaResponseQueue_in, DMAResponseMsg) { - dma_sequencer.ackCallback(); - } + dma_sequencer.ackCallback(address); } action(d_dataCallback, "d", desc="Write data to dma sequencer") { - peek (dmaResponseQueue_in, DMAResponseMsg) { - dma_sequencer.dataCallback(in_msg.DataBlk); + dma_sequencer.dataCallback(tbe.DataBlk, address); + } + + action(t_updateTBEData, "t", desc="Update TBE Data") { + assert(is_valid(tbe)); + peek(dmaResponseQueue_in, DMAResponseMsg) { + tbe.DataBlk := in_msg.DataBlk; } } + action(v_allocateTBE, "v", desc="Allocate TBE entry") { + TBEs.allocate(address); + set_tbe(TBEs[address]); + } + + action(w_deallocateTBE, "w", desc="Deallocate TBE entry") { + TBEs.deallocate(address); + unset_tbe(); + } + action(p_popRequestQueue, "p", desc="Pop request queue") { dmaRequestQueue_in.dequeue(clockEdge()); } @@ -159,23 +194,42 @@ machine(MachineType:DMA, "DMA Controller") dmaResponseQueue_in.dequeue(clockEdge()); } + action(zz_stallAndWaitRequestQueue, "zz", desc="...") { + stall_and_wait(dmaRequestQueue_in, address); + } + + action(wkad_wakeUpAllDependents, "wkad", desc="wake-up all dependents") { + wakeUpAllBuffers(); + } + transition(READY, ReadRequest, BUSY_RD) { + v_allocateTBE; s_sendReadRequest; p_popRequestQueue; } transition(READY, WriteRequest, BUSY_WR) { + v_allocateTBE; s_sendWriteRequest; p_popRequestQueue; } transition(BUSY_RD, Data, READY) { + t_updateTBEData; d_dataCallback; + w_deallocateTBE; p_popResponseQueue; + wkad_wakeUpAllDependents; } transition(BUSY_WR, Ack, READY) { a_ackCallback; + w_deallocateTBE; p_popResponseQueue; + wkad_wakeUpAllDependents; + } + + transition({BUSY_RD,BUSY_WR}, {ReadRequest,WriteRequest}) { + zz_stallAndWaitRequestQueue; } } diff --git a/src/mem/protocol/MOESI_hammer-dma.sm b/src/mem/protocol/MOESI_hammer-dma.sm index 3592e9991..0e4b4f663 100644 --- a/src/mem/protocol/MOESI_hammer-dma.sm +++ b/src/mem/protocol/MOESI_hammer-dma.sm @@ -50,15 +50,38 @@ machine(MachineType:DMA, "DMA Controller") Ack, desc="DMA write to memory completed"; } - State cur_state; + structure(TBE, desc="...") { + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="Data"; + } + + structure(TBETable, external = "yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + } + + void set_tbe(TBE b); + void unset_tbe(); + void wakeUpAllBuffers(); + + TBETable TBEs, template="", constructor="m_number_of_TBEs"; Tick clockEdge(); - State getState(Addr addr) { - return cur_state; + State getState(TBE tbe, Addr addr) { + if (is_valid(tbe)) { + return tbe.TBEState; + } else { + return State:READY; + } } - void setState(Addr addr, State state) { - cur_state := state; + + void setState(TBE tbe, Addr addr, State state) { + if (is_valid(tbe)) { + tbe.TBEState := state; + } } AccessPermission getAccessPermission(Addr addr) { @@ -82,9 +105,9 @@ machine(MachineType:DMA, "DMA Controller") if (dmaRequestQueue_in.isReady(clockEdge())) { peek(dmaRequestQueue_in, SequencerMsg) { if (in_msg.Type == SequencerRequestType:LD ) { - trigger(Event:ReadRequest, in_msg.LineAddress); + trigger(Event:ReadRequest, in_msg.LineAddress, TBEs[in_msg.LineAddress]); } else if (in_msg.Type == SequencerRequestType:ST) { - trigger(Event:WriteRequest, in_msg.LineAddress); + trigger(Event:WriteRequest, in_msg.LineAddress, TBEs[in_msg.LineAddress]); } else { error("Invalid request type"); } @@ -96,9 +119,9 @@ machine(MachineType:DMA, "DMA Controller") if (dmaResponseQueue_in.isReady(clockEdge())) { peek( dmaResponseQueue_in, DMAResponseMsg) { if (in_msg.Type == DMAResponseType:ACK) { - trigger(Event:Ack, in_msg.LineAddress); + trigger(Event:Ack, in_msg.LineAddress, TBEs[in_msg.LineAddress]); } else if (in_msg.Type == DMAResponseType:DATA) { - trigger(Event:Data, in_msg.LineAddress); + trigger(Event:Data, in_msg.LineAddress, TBEs[in_msg.LineAddress]); } else { error("Invalid response type"); } @@ -137,17 +160,30 @@ machine(MachineType:DMA, "DMA Controller") } action(a_ackCallback, "a", desc="Notify dma controller that write request completed") { - peek (dmaResponseQueue_in, DMAResponseMsg) { - dma_sequencer.ackCallback(); - } + dma_sequencer.ackCallback(address); } action(d_dataCallback, "d", desc="Write data to dma sequencer") { - peek (dmaResponseQueue_in, DMAResponseMsg) { - dma_sequencer.dataCallback(in_msg.DataBlk); + dma_sequencer.dataCallback(tbe.DataBlk, address); + } + + action(t_updateTBEData, "t", desc="Update TBE Data") { + assert(is_valid(tbe)); + peek( dmaResponseQueue_in, DMAResponseMsg) { + tbe.DataBlk := in_msg.DataBlk; } } + action(v_allocateTBE, "v", desc="Allocate TBE entry") { + TBEs.allocate(address); + set_tbe(TBEs[address]); + } + + action(w_deallocateTBE, "w", desc="Deallocate TBE entry") { + TBEs.deallocate(address); + unset_tbe(); + } + action(p_popRequestQueue, "p", desc="Pop request queue") { dmaRequestQueue_in.dequeue(clockEdge()); } @@ -156,23 +192,43 @@ machine(MachineType:DMA, "DMA Controller") dmaResponseQueue_in.dequeue(clockEdge()); } + action(zz_stallAndWaitRequestQueue, "zz", desc="...") { + stall_and_wait(dmaRequestQueue_in, address); + } + + action(wkad_wakeUpAllDependents, "wkad", desc="wake-up all dependents") { + wakeUpAllBuffers(); + } + transition(READY, ReadRequest, BUSY_RD) { + v_allocateTBE; s_sendReadRequest; p_popRequestQueue; } transition(READY, WriteRequest, BUSY_WR) { + v_allocateTBE; s_sendWriteRequest; p_popRequestQueue; } transition(BUSY_RD, Data, READY) { + t_updateTBEData; d_dataCallback; + w_deallocateTBE; p_popResponseQueue; + wkad_wakeUpAllDependents; } transition(BUSY_WR, Ack, READY) { a_ackCallback; + w_deallocateTBE; p_popResponseQueue; + wkad_wakeUpAllDependents; } + + transition({BUSY_RD,BUSY_WR}, {ReadRequest,WriteRequest}) { + zz_stallAndWaitRequestQueue; + } + } diff --git a/src/mem/protocol/RubySlicc_Types.sm b/src/mem/protocol/RubySlicc_Types.sm index e8a739eb4..27a045d29 100644 --- a/src/mem/protocol/RubySlicc_Types.sm +++ b/src/mem/protocol/RubySlicc_Types.sm @@ -220,8 +220,8 @@ structure (WireBuffer, inport="yes", outport="yes", external = "yes") { } structure (DMASequencer, external = "yes") { - void ackCallback(); - void dataCallback(DataBlock); + void ackCallback(Addr); + void dataCallback(DataBlock,Addr); void recordRequestType(CacheRequestType); } diff --git a/src/mem/ruby/system/DMASequencer.cc b/src/mem/ruby/system/DMASequencer.cc index 3b0304158..4bee19b52 100644 --- a/src/mem/ruby/system/DMASequencer.cc +++ b/src/mem/ruby/system/DMASequencer.cc @@ -35,8 +35,18 @@ #include "mem/ruby/system/DMASequencer.hh" #include "mem/ruby/system/RubySystem.hh" +DMARequest::DMARequest(uint64_t start_paddr, int len, bool write, + int bytes_completed, int bytes_issued, uint8_t *data, + PacketPtr pkt) + : start_paddr(start_paddr), len(len), write(write), + bytes_completed(bytes_completed), bytes_issued(bytes_issued), data(data), + pkt(pkt) +{ +} + DMASequencer::DMASequencer(const Params *p) - : RubyPort(p) + : RubyPort(p), m_outstanding_count(0), + m_max_outstanding_requests(p->max_outstanding_requests) { } @@ -44,7 +54,6 @@ void DMASequencer::init() { RubyPort::init(); - m_is_busy = false; m_data_block_mask = mask(RubySystem::getBlockSizeBits()); for (const auto &s_port : slave_ports) @@ -54,7 +63,7 @@ DMASequencer::init() RequestStatus DMASequencer::makeRequest(PacketPtr pkt) { - if (m_is_busy) { + if (m_outstanding_count == m_max_outstanding_requests) { return RequestStatus_BufferFull; } @@ -63,21 +72,29 @@ DMASequencer::makeRequest(PacketPtr pkt) int len = pkt->getSize(); bool write = pkt->isWrite(); - assert(!m_is_busy); // only support one outstanding DMA request - m_is_busy = true; + assert(m_outstanding_count < m_max_outstanding_requests); + Addr line_addr = makeLineAddress(paddr); + auto emplace_pair = + m_RequestTable.emplace(std::piecewise_construct, + std::forward_as_tuple(line_addr), + std::forward_as_tuple(paddr, len, write, 0, + 0, data, pkt)); + DMARequest& active_request = emplace_pair.first->second; + + // This is pretty conservative. A regular Sequencer with a more beefy + // request table that can track multiple requests for a cache line should + // be used if a more aggressive policy is needed. + if (!emplace_pair.second) { + DPRINTF(RubyDma, "DMA aliased: addr %p, len %d\n", line_addr, len); + return RequestStatus_Aliased; + } - active_request.start_paddr = paddr; - active_request.write = write; - active_request.data = data; - active_request.len = len; - active_request.bytes_completed = 0; - active_request.bytes_issued = 0; - active_request.pkt = pkt; + DPRINTF(RubyDma, "DMA req created: addr %p, len %d\n", line_addr, len); std::shared_ptr msg = std::make_shared(clockEdge()); msg->getPhysicalAddress() = paddr; - msg->getLineAddress() = makeLineAddress(msg->getPhysicalAddress()); + msg->getLineAddress() = line_addr; msg->getType() = write ? SequencerRequestType_ST : SequencerRequestType_LD; int offset = paddr & m_data_block_mask; @@ -90,6 +107,8 @@ DMASequencer::makeRequest(PacketPtr pkt) } } + m_outstanding_count++; + assert(m_mandatory_q_ptr != NULL); m_mandatory_q_ptr->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1))); active_request.bytes_issued += msg->getLen(); @@ -98,18 +117,22 @@ DMASequencer::makeRequest(PacketPtr pkt) } void -DMASequencer::issueNext() +DMASequencer::issueNext(const Addr& address) { - assert(m_is_busy); + RequestTable::iterator i = m_RequestTable.find(address); + assert(i != m_RequestTable.end()); + + DMARequest &active_request = i->second; + + assert(m_outstanding_count <= m_max_outstanding_requests); active_request.bytes_completed = active_request.bytes_issued; if (active_request.len == active_request.bytes_completed) { - // - // Must unset the busy flag before calling back the dma port because - // the callback may cause a previously nacked request to be reissued - // - DPRINTF(RubyDma, "DMA request completed\n"); - m_is_busy = false; - ruby_hit_callback(active_request.pkt); + DPRINTF(RubyDma, "DMA request completed: addr %p, size %d\n", + address, active_request.len); + m_outstanding_count--; + PacketPtr pkt = active_request.pkt; + m_RequestTable.erase(i); + ruby_hit_callback(pkt); return; } @@ -146,9 +169,13 @@ DMASequencer::issueNext() } void -DMASequencer::dataCallback(const DataBlock & dblk) +DMASequencer::dataCallback(const DataBlock & dblk, const Addr& address) { - assert(m_is_busy); + + RequestTable::iterator i = m_RequestTable.find(address); + assert(i != m_RequestTable.end()); + + DMARequest &active_request = i->second; int len = active_request.bytes_issued - active_request.bytes_completed; int offset = 0; if (active_request.bytes_completed == 0) @@ -158,13 +185,16 @@ DMASequencer::dataCallback(const DataBlock & dblk) memcpy(&active_request.data[active_request.bytes_completed], dblk.getData(offset, len), len); } - issueNext(); + issueNext(address); } void -DMASequencer::ackCallback() +DMASequencer::ackCallback(const Addr& address) { - issueNext(); + RequestTable::iterator i = m_RequestTable.find(address); + assert(i != m_RequestTable.end()); + + issueNext(address); } void diff --git a/src/mem/ruby/system/DMASequencer.hh b/src/mem/ruby/system/DMASequencer.hh index 3b408e5ac..9f1f4e503 100644 --- a/src/mem/ruby/system/DMASequencer.hh +++ b/src/mem/ruby/system/DMASequencer.hh @@ -31,14 +31,19 @@ #include #include +#include #include "mem/protocol/DMASequencerRequestType.hh" +#include "mem/ruby/common/Address.hh" #include "mem/ruby/common/DataBlock.hh" #include "mem/ruby/system/RubyPort.hh" #include "params/DMASequencer.hh" struct DMARequest { + DMARequest(uint64_t start_paddr, int len, bool write, int bytes_completed, + int bytes_issued, uint8_t *data, PacketPtr pkt); + uint64_t start_paddr; int len; bool write; @@ -57,23 +62,27 @@ class DMASequencer : public RubyPort /* external interface */ RequestStatus makeRequest(PacketPtr pkt) override; - bool busy() { return m_is_busy;} - int outstandingCount() const override { return (m_is_busy ? 1 : 0); } + bool busy() { return m_outstanding_count > 0; } + int outstandingCount() const override { return m_outstanding_count; } bool isDeadlockEventScheduled() const override { return false; } void descheduleDeadlockEvent() override {} /* SLICC callback */ - void dataCallback(const DataBlock & dblk); - void ackCallback(); + void dataCallback(const DataBlock &dblk, const Addr &addr); + void ackCallback(const Addr &addr); void recordRequestType(DMASequencerRequestType requestType); private: - void issueNext(); + void issueNext(const Addr &addr); - bool m_is_busy; uint64_t m_data_block_mask; - DMARequest active_request; + + typedef std::unordered_map RequestTable; + RequestTable m_RequestTable; + + int m_outstanding_count; + int m_max_outstanding_requests; }; #endif // __MEM_RUBY_SYSTEM_DMASEQUENCER_HH__ diff --git a/src/mem/ruby/system/Sequencer.py b/src/mem/ruby/system/Sequencer.py index ed142e914..22d545d30 100644 --- a/src/mem/ruby/system/Sequencer.py +++ b/src/mem/ruby/system/Sequencer.py @@ -81,3 +81,4 @@ class RubySequencer(RubyPort): class DMASequencer(RubyPort): type = 'DMASequencer' cxx_header = "mem/ruby/system/DMASequencer.hh" + max_outstanding_requests = Param.Int(64, "max outstanding requests") -- 2.30.2