gpu-compute,mem-ruby: Refactor GPU coalescer
[gem5.git] / src / gpu-compute / tlb_coalescer.hh
1 /*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #ifndef __TLB_COALESCER_HH__
35 #define __TLB_COALESCER_HH__
36
37 #include <list>
38 #include <queue>
39 #include <string>
40 #include <vector>
41
42 #include "arch/generic/tlb.hh"
43 #include "arch/isa.hh"
44 #include "arch/isa_traits.hh"
45 #include "arch/x86/pagetable.hh"
46 #include "arch/x86/regs/segment.hh"
47 #include "base/logging.hh"
48 #include "base/statistics.hh"
49 #include "gpu-compute/gpu_tlb.hh"
50 #include "mem/port.hh"
51 #include "mem/request.hh"
52 #include "params/TLBCoalescer.hh"
53 #include "sim/clocked_object.hh"
54
55 class BaseTLB;
56 class Packet;
57 class ThreadContext;
58
59 /**
60 * The TLBCoalescer is a ClockedObject sitting on the front side (CPUSide) of
61 * each TLB. It receives packets and issues coalesced requests to the
62 * TLB below it. It controls how requests are coalesced (the rules)
63 * and the permitted number of TLB probes per cycle (i.e., how many
64 * coalesced requests it feeds the TLB per cycle).
65 */
66 class TLBCoalescer : public ClockedObject
67 {
68 protected:
69 // TLB clock: will inherit clock from shader's clock period in terms
70 // of nuber of ticks of curTime (aka global simulation clock)
71 // The assignment of TLB clock from shader clock is done in the
72 // python config files.
73 int clock;
74
75 public:
76 typedef TLBCoalescerParams Params;
77 TLBCoalescer(const Params *p);
78 ~TLBCoalescer() { }
79
80 // Number of TLB probes per cycle. Parameterizable - default 2.
81 int TLBProbesPerCycle;
82
83 // Consider coalescing across that many ticks.
84 // Paraemterizable - default 1.
85 int coalescingWindow;
86
87 // Each coalesced request consists of multiple packets
88 // that all fall within the same virtual page
89 typedef std::vector<PacketPtr> coalescedReq;
90
91 // disables coalescing when true
92 bool disableCoalescing;
93
94 /*
95 * This is a hash map with <tick_index> as a key.
96 * It contains a vector of coalescedReqs per <tick_index>.
97 * Requests are buffered here until they can be issued to
98 * the TLB, at which point they are copied to the
99 * issuedTranslationsTable hash map.
100 *
101 * In terms of coalescing, we coalesce requests in a given
102 * window of x cycles by using tick_index = issueTime/x as a
103 * key, where x = coalescingWindow. issueTime is the issueTime
104 * of the pkt from the ComputeUnit's perspective, but another
105 * option is to change it to curTick(), so we coalesce based
106 * on the receive time.
107 */
108 typedef std::unordered_map<int64_t, std::vector<coalescedReq>> CoalescingFIFO;
109
110 CoalescingFIFO coalescerFIFO;
111
112 /*
113 * issuedTranslationsTabler: a hash_map indexed by virtual page
114 * address. Each hash_map entry has a vector of PacketPtr associated
115 * with it denoting the different packets that share an outstanding
116 * coalesced translation request for the same virtual page.
117 *
118 * The rules that determine which requests we can coalesce are
119 * specified in the canCoalesce() method.
120 */
121 typedef std::unordered_map<Addr, coalescedReq> CoalescingTable;
122
123 CoalescingTable issuedTranslationsTable;
124
125 // number of packets the coalescer receives
126 Stats::Scalar uncoalescedAccesses;
127 // number packets the coalescer send to the TLB
128 Stats::Scalar coalescedAccesses;
129
130 // Number of cycles the coalesced requests spend waiting in
131 // coalescerFIFO. For each packet the coalescer receives we take into
132 // account the number of all uncoalesced requests this pkt "represents"
133 Stats::Scalar queuingCycles;
134
135 // On average how much time a request from the
136 // uncoalescedAccesses that reaches the TLB
137 // spends waiting?
138 Stats::Scalar localqueuingCycles;
139 // localqueuingCycles/uncoalescedAccesses
140 Stats::Formula localLatency;
141
142 bool canCoalesce(PacketPtr pkt1, PacketPtr pkt2);
143 void updatePhysAddresses(PacketPtr pkt);
144 void regStats() override;
145
146 // Clock related functions. Maps to-and-from
147 // Simulation ticks and object clocks.
148 Tick frequency() const { return SimClock::Frequency / clock; }
149 Tick ticks(int numCycles) const { return (Tick)clock * numCycles; }
150 Tick curCycle() const { return curTick() / clock; }
151 Tick tickToCycles(Tick val) const { return val / clock;}
152
153 class CpuSidePort : public SlavePort
154 {
155 public:
156 CpuSidePort(const std::string &_name, TLBCoalescer *tlb_coalescer,
157 PortID _index)
158 : SlavePort(_name, tlb_coalescer), coalescer(tlb_coalescer),
159 index(_index) { }
160
161 protected:
162 TLBCoalescer *coalescer;
163 int index;
164
165 virtual bool recvTimingReq(PacketPtr pkt);
166 virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
167 virtual void recvFunctional(PacketPtr pkt);
168 virtual void recvRangeChange() { }
169 virtual void recvReqRetry();
170
171 virtual void
172 recvRespRetry()
173 {
174 fatal("recvRespRetry() is not implemented in the TLB coalescer.\n");
175 }
176
177 virtual AddrRangeList getAddrRanges() const;
178 };
179
180 class MemSidePort : public MasterPort
181 {
182 public:
183 MemSidePort(const std::string &_name, TLBCoalescer *tlb_coalescer,
184 PortID _index)
185 : MasterPort(_name, tlb_coalescer), coalescer(tlb_coalescer),
186 index(_index) { }
187
188 std::deque<PacketPtr> retries;
189
190 protected:
191 TLBCoalescer *coalescer;
192 int index;
193
194 virtual bool recvTimingResp(PacketPtr pkt);
195 virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
196 virtual void recvFunctional(PacketPtr pkt);
197 virtual void recvRangeChange() { }
198 virtual void recvReqRetry();
199
200 virtual void
201 recvRespRetry()
202 {
203 fatal("recvRespRetry() not implemented in TLB coalescer");
204 }
205 };
206
207 // Coalescer slave ports on the cpu Side
208 std::vector<CpuSidePort*> cpuSidePort;
209 // Coalescer master ports on the memory side
210 std::vector<MemSidePort*> memSidePort;
211
212 Port &getPort(const std::string &if_name,
213 PortID idx=InvalidPortID) override;
214
215 void processProbeTLBEvent();
216 /// This event issues the TLB probes
217 EventFunctionWrapper probeTLBEvent;
218
219 void processCleanupEvent();
220 /// The cleanupEvent is scheduled after a TLBEvent triggers
221 /// in order to free memory and do the required clean-up
222 EventFunctionWrapper cleanupEvent;
223
224 // this FIFO queue keeps track of the virt. page
225 // addresses that are pending cleanup
226 std::queue<Addr> cleanupQueue;
227 };
228
229 #endif // __TLB_COALESCER_HH__