From b811d3a34213787bef2758c7253e42d6b6c55f24 Mon Sep 17 00:00:00 2001 From: Tony Gutierrez Date: Thu, 7 Jun 2018 14:06:22 -0400 Subject: [PATCH] mem-ruby: Add DMA support to MOESI_AMD_Base-dir.sm This change adds DMA support to the MOESI_AMD_Base-dir.sm, which is needed to support ROCm apps/GCN3 ISA in the VIPER ptl. The DMA controller is copied from the MOESI_hammer-dma.sm with few modifications. Change-Id: I56141436eee1c8f62c2a0915fa3b63b83bbcbc9a Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29914 Reviewed-by: Anthony Gutierrez Maintainer: Anthony Gutierrez Tested-by: kokoro --- src/mem/ruby/protocol/GPU_VIPER.slicc | 1 + src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm | 221 ++++++++++++++++++- src/mem/ruby/protocol/MOESI_AMD_Base-dma.sm | 233 ++++++++++++++++++++ src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm | 50 ++++- 4 files changed, 499 insertions(+), 6 deletions(-) create mode 100644 src/mem/ruby/protocol/MOESI_AMD_Base-dma.sm diff --git a/src/mem/ruby/protocol/GPU_VIPER.slicc b/src/mem/ruby/protocol/GPU_VIPER.slicc index 55ed6710a..196058b0d 100644 --- a/src/mem/ruby/protocol/GPU_VIPER.slicc +++ b/src/mem/ruby/protocol/GPU_VIPER.slicc @@ -2,6 +2,7 @@ protocol "GPU_VIPER"; include "RubySlicc_interfaces.slicc"; include "MOESI_AMD_Base-msg.sm"; include "MOESI_AMD_Base-dir.sm"; +include "MOESI_AMD_Base-dma.sm"; include "MOESI_AMD_Base-CorePair.sm"; include "GPU_VIPER-msg.sm"; include "GPU_VIPER-TCP.sm"; diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm index efbffbd8c..c8dafd5a8 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm @@ -42,6 +42,10 @@ machine(MachineType:Directory, "AMD Baseline protocol") bool useL3OnWT := "False"; Cycles to_memory_controller_latency := 1; + // DMA + MessageBuffer * requestFromDMA, network="From", virtual_network="1", vnet_type="request"; + MessageBuffer * responseToDMA, network="To", virtual_network="3", vnet_type="request"; + // From the Cores MessageBuffer * requestFromCores, network="From", virtual_network="0", vnet_type="request"; MessageBuffer * responseFromCores, network="From", virtual_network="2", vnet_type="response"; @@ -63,13 +67,17 @@ machine(MachineType:Directory, "AMD Baseline protocol") // BL is Busy because it's possible for the data only to be in the network // in the WB, L3 has sent it and gone on with its business in possibly I // state. + BDR_M, AccessPermission:Backing_Store, desc="DMA read, blocked waiting for memory"; BS_M, AccessPermission:Backing_Store, desc="blocked waiting for memory"; BM_M, AccessPermission:Backing_Store, desc="blocked waiting for memory"; B_M, AccessPermission:Backing_Store, desc="blocked waiting for memory"; BP, AccessPermission:Backing_Store, desc="blocked waiting for probes, no need for memory"; + BDR_PM, AccessPermission:Backing_Store, desc="DMA read, blocked waiting for probes and memory"; BS_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory"; BM_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory"; B_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory"; + BDW_P, AccessPermission:Backing_Store, desc="DMA write, blocked waiting for probes, no need for memory"; + BDR_Pm, AccessPermission:Backing_Store, desc="DMA read, blocked waiting for probes, already got memory"; BS_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; BM_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; B_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; @@ -107,6 +115,10 @@ machine(MachineType:Directory, "AMD Baseline protocol") UnblockWriteThrough, desc="Unblock because of writethrough request finishing"; StaleVicDirty, desc="Core invalidated before VicDirty processed"; + + // DMA + DmaRead, desc="DMA read"; + DmaWrite, desc="DMA write"; } enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { @@ -148,6 +160,7 @@ machine(MachineType:Directory, "AMD Baseline protocol") bool L3Hit, default="false", desc="Was this an L3 hit?"; uint64_t probe_id, desc="probe id for lifetime profiling"; WriteMask writeMask, desc="outstanding write through mask"; + int Len, desc="Length of memory request for DMA"; } structure(TBETable, external="yes") { @@ -266,6 +279,8 @@ machine(MachineType:Directory, "AMD Baseline protocol") } // ** OUT_PORTS ** + out_port(dmaResponseQueue_out, DMAResponseMsg, responseToDMA); + out_port(probeNetwork_out, NBProbeRequestMsg, probeToCore); out_port(responseNetwork_out, ResponseMsg, responseToCore); @@ -276,6 +291,23 @@ machine(MachineType:Directory, "AMD Baseline protocol") // ** IN_PORTS ** + // DMA Ports + in_port(dmaRequestQueue_in, DMARequestMsg, requestFromDMA, rank=6) { + if (dmaRequestQueue_in.isReady(clockEdge())) { + peek(dmaRequestQueue_in, DMARequestMsg) { + TBE tbe := TBEs.lookup(in_msg.LineAddress); + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.LineAddress)); + if (in_msg.Type == DMARequestType:READ) { + trigger(Event:DmaRead, in_msg.LineAddress, entry, tbe); + } else if (in_msg.Type == DMARequestType:WRITE) { + trigger(Event:DmaWrite, in_msg.LineAddress, entry, tbe); + } else { + error("Unknown DMA msg"); + } + } + } + } + // Trigger Queue in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=5) { if (triggerQueue_in.isReady(clockEdge())) { @@ -395,6 +427,25 @@ machine(MachineType:Directory, "AMD Baseline protocol") } // Actions + action(dd_sendResponseDmaData, "dd", desc="send DMA data response") { + enqueue(dmaResponseQueue_out, DMAResponseMsg, response_latency) { + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:DATA; + out_msg.Destination.add(tbe.OriginalRequestor); + out_msg.DataBlk := tbe.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + + action(da_sendResponseDmaAck, "da", desc="send DMA data response") { + enqueue(dmaResponseQueue_out, DMAResponseMsg, response_latency) { + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:ACK; + out_msg.Destination.add(tbe.OriginalRequestor); + out_msg.MessageSize := MessageSizeType:Response_Control; + } + } + action(s_sendResponseS, "s", desc="send Shared response") { enqueue(responseNetwork_out, ResponseMsg, response_latency) { out_msg.addr := address; @@ -531,6 +582,29 @@ machine(MachineType:Directory, "AMD Baseline protocol") } } + action(qdr_queueDmaRdReq, "qdr", desc="Read data from memory for DMA") { + peek(dmaRequestQueue_in, DMARequestMsg) { + if (L3CacheMemory.isTagPresent(address)) { + enqueue(L3TriggerQueue_out, TriggerMsg, l3_hit_latency) { + out_msg.addr := address; + out_msg.Type := TriggerType:L3Hit; + } + CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address)); + tbe.DataBlk := entry.DataBlk; + tbe.L3Hit := true; + tbe.MemData := true; + L3CacheMemory.deallocate(address); + } else { + enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) { + out_msg.addr := address; + out_msg.Type := MemoryRequestType:MEMORY_READ; + out_msg.Sender := machineID; + out_msg.MessageSize := MessageSizeType:Request_Control; + } + } + } + } + action(l_queueMemRdReq, "lr", desc="Read data from memory") { peek(requestNetwork_in, CPURequestMsg) { if (L3CacheMemory.isTagPresent(address)) { @@ -558,6 +632,40 @@ machine(MachineType:Directory, "AMD Baseline protocol") } } + action(icd_probeInvCoreDataForDMA, "icd", desc="Probe inv cores, return data for DMA") { + peek(dmaRequestQueue_in, DMARequestMsg) { + enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbInv; + out_msg.ReturnData := true; + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Destination.broadcast(MachineType:CorePair); + + // add relevant TCC node to list. This replaces all TCPs and SQCs + if (noTCCdir) { + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits)); + } else { + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + } + out_msg.Destination.remove(in_msg.Requestor); + tbe.NumPendingAcks := out_msg.Destination.count(); + if (tbe.NumPendingAcks == 0) { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.addr := address; + out_msg.Type := TriggerType:AcksComplete; + } + } + DPRINTF(RubySlicc, "%s\n", out_msg); + APPEND_TRANSITION_COMMENT(" dc: Acks remaining: "); + APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); + tbe.ProbeRequestStartTime := curCycle(); + } + } + } + action(dc_probeInvCoreData, "dc", desc="probe inv cores, return data") { peek(requestNetwork_in, CPURequestMsg) { enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) { @@ -596,6 +704,42 @@ machine(MachineType:Directory, "AMD Baseline protocol") } } + action(scd_probeShrCoreDataForDma, "dsc", desc="probe shared cores, return data for DMA") { + peek(dmaRequestQueue_in, DMARequestMsg) { + enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) { + out_msg.addr := address; + out_msg.Type := ProbeRequestType:PrbDowngrade; + out_msg.ReturnData := true; + out_msg.MessageSize := MessageSizeType:Control; + out_msg.Destination.broadcast(MachineType:CorePair); + // add relevant TCC node to the list. This replaces all TCPs and SQCs + if (noTCCdir || CPUonly) { + //Don't need to notify TCC about reads + } else { + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + TCC_select_low_bit, TCC_select_num_bits)); + } + if (noTCCdir && !CPUonly) { + out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, + TCC_select_low_bit, TCC_select_num_bits)); + } + out_msg.Destination.remove(in_msg.Requestor); + tbe.NumPendingAcks := out_msg.Destination.count(); + if (tbe.NumPendingAcks == 0) { + enqueue(triggerQueue_out, TriggerMsg, 1) { + out_msg.addr := address; + out_msg.Type := TriggerType:AcksComplete; + } + } + DPRINTF(RubySlicc, "%s\n", (out_msg)); + APPEND_TRANSITION_COMMENT(" sc: Acks remaining: "); + APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); + tbe.ProbeRequestStartTime := curCycle(); + } + } + } + action(sc_probeShrCoreData, "sc", desc="probe shared cores, return data") { peek(requestNetwork_in, CPURequestMsg) { // not the right network? enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) { @@ -681,6 +825,24 @@ machine(MachineType:Directory, "AMD Baseline protocol") } } + action(atd_allocateTBEforDMA, "atd", desc="allocate TBE Entry for DMA") { + check_allocate(TBEs); + peek(dmaRequestQueue_in, DMARequestMsg) { + TBEs.allocate(address); + set_tbe(TBEs.lookup(address)); + tbe.OriginalRequestor := in_msg.Requestor; + tbe.NumPendingAcks := 0; + tbe.Dirty := false; + tbe.Len := in_msg.Len; + if (in_msg.Type == DMARequestType:WRITE) { + tbe.wtData := true; + tbe.Dirty := true; + tbe.DataBlk := in_msg.DataBlk; + tbe.writeMask.fillMask(); + } + } + } + action(t_allocateTBE, "t", desc="allocate TBE Entry") { check_allocate(TBEs); peek(requestNetwork_in, CPURequestMsg) { @@ -867,6 +1029,10 @@ machine(MachineType:Directory, "AMD Baseline protocol") L3CacheMemory.deallocate(address); } + action(pd_popDmaRequestQueue, "pd", desc="Pop DMA request queue") { + dmaRequestQueue_in.dequeue(clockEdge()); + } + action(p_popRequestQueue, "p", desc="pop request queue") { requestNetwork_in.dequeue(clockEdge()); } @@ -915,7 +1081,7 @@ machine(MachineType:Directory, "AMD Baseline protocol") } // TRANSITIONS - transition({BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, {RdBlkS, RdBlkM, RdBlk, CtoD}) { + transition({BL, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B}, {RdBlkS, RdBlkM, RdBlk, CtoD}) { st_stallAndWaitRequest; } @@ -926,6 +1092,13 @@ machine(MachineType:Directory, "AMD Baseline protocol") // transitions from U + transition(U, DmaRead, BDR_PM) {L3TagArrayRead} { + atd_allocateTBEforDMA; + qdr_queueDmaRdReq; + scd_probeShrCoreDataForDma; + pd_popDmaRequestQueue; + } + transition(U, {RdBlkS}, BS_PM) {L3TagArrayRead} { t_allocateTBE; l_queueMemRdReq; @@ -933,6 +1106,13 @@ machine(MachineType:Directory, "AMD Baseline protocol") p_popRequestQueue; } + transition(U, DmaWrite, BDW_P) {L3TagArrayRead} { + atd_allocateTBEforDMA; + da_sendResponseDmaAck; + icd_probeInvCoreDataForDMA; + pd_popDmaRequestQueue; + } + transition(U, WriteThrough, BM_PM) {L3TagArrayRead, L3TagArrayWrite} { t_allocateTBE; w_sendResponseWBAck; @@ -998,15 +1178,15 @@ machine(MachineType:Directory, "AMD Baseline protocol") pr_popResponseQueue; } - transition({B, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm}, {VicDirty, VicClean}) { + transition({B, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm}, {VicDirty, VicClean}) { z_stall; } - transition({U, BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, WBAck) { + transition({U, BL, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B}, WBAck) { pm_popMemQueue; } - transition({U, BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, StaleVicDirty) { + transition({U, BL, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B}, StaleVicDirty) { rv_removeVicDirtyIgnore; w_sendResponseWBAck; p_popRequestQueue; @@ -1022,6 +1202,11 @@ machine(MachineType:Directory, "AMD Baseline protocol") pt_popTriggerQueue; } + transition(BDR_PM, MemData, BDR_Pm) { + mt_writeMemDataToTBE; + pm_popMemQueue; + } + transition(BS_PM, MemData, BS_Pm) {} { mt_writeMemDataToTBE; pm_popMemQueue; @@ -1037,6 +1222,10 @@ machine(MachineType:Directory, "AMD Baseline protocol") pm_popMemQueue; } + transition(BDR_PM, L3Hit, BDR_Pm) { + ptl_popTriggerQueue; + } + transition(BS_PM, L3Hit, BS_Pm) {} { ptl_popTriggerQueue; } @@ -1049,6 +1238,13 @@ machine(MachineType:Directory, "AMD Baseline protocol") ptl_popTriggerQueue; } + transition(BDR_M, MemData, U) { + mt_writeMemDataToTBE; + dd_sendResponseDmaData; + dt_deallocateTBE; + pm_popMemQueue; + } + transition(BS_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} { mt_writeMemDataToTBE; s_sendResponseS; @@ -1100,13 +1296,17 @@ machine(MachineType:Directory, "AMD Baseline protocol") ptl_popTriggerQueue; } - transition({BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, BP}, CPUPrbResp) { + transition({BDR_PM, BS_PM, BDW_P, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, BP}, CPUPrbResp) { y_writeProbeDataToTBE; x_decrementAcks; o_checkForCompletion; pr_popResponseQueue; } + transition(BDR_PM, ProbeAcksComplete, BDR_M) { + pt_popTriggerQueue; + } + transition(BS_PM, ProbeAcksComplete, BS_M) {} { sf_setForwardReqTime; pt_popTriggerQueue; @@ -1122,6 +1322,17 @@ machine(MachineType:Directory, "AMD Baseline protocol") pt_popTriggerQueue; } + transition(BDW_P, ProbeAcksComplete, U) { + dt_deallocateTBE; + pt_popTriggerQueue; + } + + transition(BDR_Pm, ProbeAcksComplete, U) { + dd_sendResponseDmaData; + dt_deallocateTBE; + pt_popTriggerQueue; + } + transition(BS_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} { sf_setForwardReqTime; s_sendResponseS; diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-dma.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-dma.sm new file mode 100644 index 000000000..dbecabd1c --- /dev/null +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-dma.sm @@ -0,0 +1,233 @@ +/* + * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +machine(MachineType:DMA, "DMA Controller") + : DMASequencer * dma_sequencer; + Cycles request_latency := 6; + + MessageBuffer * responseFromDir, network="From", virtual_network="3", + vnet_type="response"; + MessageBuffer * requestToDir, network="To", virtual_network="1", + vnet_type="request"; + MessageBuffer * mandatoryQueue; +{ + state_declaration(State, desc="DMA states", default="DMA_State_READY") { + READY, AccessPermission:Invalid, desc="Ready to accept a new request"; + BUSY_RD, AccessPermission:Busy, desc="Busy: currently processing a request"; + BUSY_WR, AccessPermission:Busy, desc="Busy: currently processing a request"; + } + + enumeration(Event, desc="DMA events") { + ReadRequest, desc="A new read request"; + WriteRequest, desc="A new write request"; + Data, desc="Data from a DMA memory read"; + Ack, desc="DMA write to memory completed"; + } + + structure(TBE, desc="...") { + State TBEState, desc="Transient state"; + DataBlock DataBlk, desc="Data"; + } + + structure(TBETable, external = "yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + } + + void set_tbe(TBE b); + void unset_tbe(); + void wakeUpAllBuffers(); + + TBETable TBEs, template="", constructor="m_number_of_TBEs"; + + Tick clockEdge(); + MachineID mapAddressToMachine(Addr addr, MachineType mtype); + + State getState(TBE tbe, Addr addr) { + if (is_valid(tbe)) { + return tbe.TBEState; + } else { + return State:READY; + } + } + + void setState(TBE tbe, Addr addr, State state) { + if (is_valid(tbe)) { + tbe.TBEState := state; + } + } + + AccessPermission getAccessPermission(Addr addr) { + return AccessPermission:NotPresent; + } + + void setAccessPermission(Addr addr, State state) { + } + + void functionalRead(Addr addr, Packet *pkt) { + error("DMA does not support functional read."); + } + + int functionalWrite(Addr addr, Packet *pkt) { + error("DMA does not support functional write."); + } + + out_port(requestToDir_out, DMARequestMsg, requestToDir, desc="..."); + + in_port(dmaRequestQueue_in, SequencerMsg, mandatoryQueue, desc="...") { + if (dmaRequestQueue_in.isReady(clockEdge())) { + peek(dmaRequestQueue_in, SequencerMsg) { + if (in_msg.Type == SequencerRequestType:LD ) { + trigger(Event:ReadRequest, in_msg.LineAddress, TBEs[in_msg.LineAddress]); + } else if (in_msg.Type == SequencerRequestType:ST) { + trigger(Event:WriteRequest, in_msg.LineAddress, TBEs[in_msg.LineAddress]); + } else { + error("Invalid request type"); + } + } + } + } + + in_port(dmaResponseQueue_in, DMAResponseMsg, responseFromDir, desc="...") { + if (dmaResponseQueue_in.isReady(clockEdge())) { + peek( dmaResponseQueue_in, DMAResponseMsg) { + if (in_msg.Type == DMAResponseType:ACK) { + trigger(Event:Ack, in_msg.LineAddress, TBEs[in_msg.LineAddress]); + } else if (in_msg.Type == DMAResponseType:DATA) { + trigger(Event:Data, in_msg.LineAddress, TBEs[in_msg.LineAddress]); + } else { + error("Invalid response type"); + } + } + } + } + + action(s_sendReadRequest, "s", desc="Send a DMA read request to memory") { + peek(dmaRequestQueue_in, SequencerMsg) { + enqueue(requestToDir_out, DMARequestMsg, request_latency) { + out_msg.PhysicalAddress := in_msg.PhysicalAddress; + out_msg.LineAddress := in_msg.LineAddress; + out_msg.Type := DMARequestType:READ; + out_msg.Requestor := machineID; + out_msg.Len := in_msg.Len; + out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(s_sendWriteRequest, "\s", desc="Send a DMA write request to memory") { + peek(dmaRequestQueue_in, SequencerMsg) { + enqueue(requestToDir_out, DMARequestMsg, request_latency) { + out_msg.PhysicalAddress := in_msg.PhysicalAddress; + out_msg.LineAddress := in_msg.LineAddress; + out_msg.Type := DMARequestType:WRITE; + out_msg.Requestor := machineID; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Len := in_msg.Len; + out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(a_ackCallback, "a", desc="Notify dma controller that write request completed") { + dma_sequencer.ackCallback(address); + } + + action(d_dataCallback, "d", desc="Write data to dma sequencer") { + dma_sequencer.dataCallback(tbe.DataBlk, address); + } + + action(t_updateTBEData, "t", desc="Update TBE Data") { + assert(is_valid(tbe)); + peek( dmaResponseQueue_in, DMAResponseMsg) { + tbe.DataBlk := in_msg.DataBlk; + } + } + + action(v_allocateTBE, "v", desc="Allocate TBE entry") { + TBEs.allocate(address); + set_tbe(TBEs[address]); + } + + action(w_deallocateTBE, "w", desc="Deallocate TBE entry") { + TBEs.deallocate(address); + unset_tbe(); + } + + action(p_popRequestQueue, "p", desc="Pop request queue") { + dmaRequestQueue_in.dequeue(clockEdge()); + } + + action(p_popResponseQueue, "\p", desc="Pop request queue") { + dmaResponseQueue_in.dequeue(clockEdge()); + } + + action(zz_stallAndWaitRequestQueue, "zz", desc="...") { + stall_and_wait(dmaRequestQueue_in, address); + } + + action(wkad_wakeUpAllDependents, "wkad", desc="wake-up all dependents") { + wakeUpAllBuffers(); + } + + transition(READY, ReadRequest, BUSY_RD) { + v_allocateTBE; + s_sendReadRequest; + p_popRequestQueue; + } + + transition(READY, WriteRequest, BUSY_WR) { + v_allocateTBE; + s_sendWriteRequest; + p_popRequestQueue; + } + + transition(BUSY_RD, Data, READY) { + t_updateTBEData; + d_dataCallback; + w_deallocateTBE; + p_popResponseQueue; + wkad_wakeUpAllDependents; + } + + transition(BUSY_WR, Ack, READY) { + a_ackCallback; + w_deallocateTBE; + p_popResponseQueue; + wkad_wakeUpAllDependents; + } + + transition({BUSY_RD,BUSY_WR}, {ReadRequest,WriteRequest}) { + zz_stallAndWaitRequestQueue; + } + +} diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm index f0705192d..c0bd4fe63 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2015 Advanced Micro Devices, Inc. + * Copyright (c) 2010-2015, 2018 Advanced Micro Devices, Inc. * All rights reserved. * * For use for simulation and test purposes only @@ -361,3 +361,51 @@ structure(FifoMsg, desc="...", interface="Message") { } } + +enumeration(DMARequestType, desc="...", default="DMARequestType_NULL") { + READ, desc="Memory Read"; + WRITE, desc="Memory Write"; + NULL, desc="Invalid"; +} + +enumeration(DMAResponseType, desc="...", default="DMAResponseType_NULL") { + DATA, desc="DATA read"; + ACK, desc="ACK write"; + NULL, desc="Invalid"; +} + +structure(DMARequestMsg, desc="...", interface="Message") { + DMARequestType Type, desc="Request type (read/write)"; + Addr PhysicalAddress, desc="Physical address for this request"; + Addr LineAddress, desc="Line address for this request"; + MachineID Requestor, desc="Node who initiated the request"; + NetDest Destination, desc="Destination"; + DataBlock DataBlk, desc="DataBlk attached to this request"; + int Len, desc="The length of the request"; + MessageSizeType MessageSize, desc="size category of the message"; + + bool functionalRead(Packet *pkt) { + return testAndRead(LineAddress, DataBlk, pkt); + } + + bool functionalWrite(Packet *pkt) { + return testAndWrite(LineAddress, DataBlk, pkt); + } +} + +structure(DMAResponseMsg, desc="...", interface="Message") { + DMAResponseType Type, desc="Response type (DATA/ACK)"; + Addr PhysicalAddress, desc="Physical address for this request"; + Addr LineAddress, desc="Line address for this request"; + NetDest Destination, desc="Destination"; + DataBlock DataBlk, desc="DataBlk attached to this request"; + MessageSizeType MessageSize, desc="size category of the message"; + + bool functionalRead(Packet *pkt) { + return testAndRead(LineAddress, DataBlk, pkt); + } + + bool functionalWrite(Packet *pkt) { + return testAndWrite(LineAddress, DataBlk, pkt); + } +} -- 2.30.2