b03e771500632acbf4155b4f8d4dc2a6a571abb1
[gem5.git] / src / gpu-compute / tlb_coalescer.hh
1 /*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Lisa Hsu
34 */
35
36 #ifndef __TLB_COALESCER_HH__
37 #define __TLB_COALESCER_HH__
38
39 #include <list>
40 #include <queue>
41 #include <string>
42 #include <vector>
43
44 #include "arch/generic/tlb.hh"
45 #include "arch/isa.hh"
46 #include "arch/isa_traits.hh"
47 #include "arch/x86/pagetable.hh"
48 #include "arch/x86/regs/segment.hh"
49 #include "base/misc.hh"
50 #include "base/statistics.hh"
51 #include "gpu-compute/gpu_tlb.hh"
52 #include "mem/mem_object.hh"
53 #include "mem/port.hh"
54 #include "mem/request.hh"
55 #include "params/TLBCoalescer.hh"
56
57 class BaseTLB;
58 class Packet;
59 class ThreadContext;
60
61 /**
62 * The TLBCoalescer is a MemObject sitting on the front side (CPUSide) of
63 * each TLB. It receives packets and issues coalesced requests to the
64 * TLB below it. It controls how requests are coalesced (the rules)
65 * and the permitted number of TLB probes per cycle (i.e., how many
66 * coalesced requests it feeds the TLB per cycle).
67 */
68 class TLBCoalescer : public MemObject
69 {
70 protected:
71 // TLB clock: will inherit clock from shader's clock period in terms
72 // of nuber of ticks of curTime (aka global simulation clock)
73 // The assignment of TLB clock from shader clock is done in the
74 // python config files.
75 int clock;
76
77 public:
78 typedef TLBCoalescerParams Params;
79 TLBCoalescer(const Params *p);
80 ~TLBCoalescer() { }
81
82 // Number of TLB probes per cycle. Parameterizable - default 2.
83 int TLBProbesPerCycle;
84
85 // Consider coalescing across that many ticks.
86 // Paraemterizable - default 1.
87 int coalescingWindow;
88
89 // Each coalesced request consists of multiple packets
90 // that all fall within the same virtual page
91 typedef std::vector<PacketPtr> coalescedReq;
92
93 // disables coalescing when true
94 bool disableCoalescing;
95
96 /*
97 * This is a hash map with <tick_index> as a key.
98 * It contains a vector of coalescedReqs per <tick_index>.
99 * Requests are buffered here until they can be issued to
100 * the TLB, at which point they are copied to the
101 * issuedTranslationsTable hash map.
102 *
103 * In terms of coalescing, we coalesce requests in a given
104 * window of x cycles by using tick_index = issueTime/x as a
105 * key, where x = coalescingWindow. issueTime is the issueTime
106 * of the pkt from the ComputeUnit's perspective, but another
107 * option is to change it to curTick(), so we coalesce based
108 * on the receive time.
109 */
110 typedef std::unordered_map<int64_t, std::vector<coalescedReq>> CoalescingFIFO;
111
112 CoalescingFIFO coalescerFIFO;
113
114 /*
115 * issuedTranslationsTabler: a hash_map indexed by virtual page
116 * address. Each hash_map entry has a vector of PacketPtr associated
117 * with it denoting the different packets that share an outstanding
118 * coalesced translation request for the same virtual page.
119 *
120 * The rules that determine which requests we can coalesce are
121 * specified in the canCoalesce() method.
122 */
123 typedef std::unordered_map<Addr, coalescedReq> CoalescingTable;
124
125 CoalescingTable issuedTranslationsTable;
126
127 // number of packets the coalescer receives
128 Stats::Scalar uncoalescedAccesses;
129 // number packets the coalescer send to the TLB
130 Stats::Scalar coalescedAccesses;
131
132 // Number of cycles the coalesced requests spend waiting in
133 // coalescerFIFO. For each packet the coalescer receives we take into
134 // account the number of all uncoalesced requests this pkt "represents"
135 Stats::Scalar queuingCycles;
136
137 // On average how much time a request from the
138 // uncoalescedAccesses that reaches the TLB
139 // spends waiting?
140 Stats::Scalar localqueuingCycles;
141 // localqueuingCycles/uncoalescedAccesses
142 Stats::Formula localLatency;
143
144 bool canCoalesce(PacketPtr pkt1, PacketPtr pkt2);
145 void updatePhysAddresses(PacketPtr pkt);
146 void regStats();
147
148 // Clock related functions. Maps to-and-from
149 // Simulation ticks and object clocks.
150 Tick frequency() const { return SimClock::Frequency / clock; }
151 Tick ticks(int numCycles) const { return (Tick)clock * numCycles; }
152 Tick curCycle() const { return curTick() / clock; }
153 Tick tickToCycles(Tick val) const { return val / clock;}
154
155 class CpuSidePort : public SlavePort
156 {
157 public:
158 CpuSidePort(const std::string &_name, TLBCoalescer *tlb_coalescer,
159 PortID _index)
160 : SlavePort(_name, tlb_coalescer), coalescer(tlb_coalescer),
161 index(_index) { }
162
163 protected:
164 TLBCoalescer *coalescer;
165 int index;
166
167 virtual bool recvTimingReq(PacketPtr pkt);
168 virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
169 virtual void recvFunctional(PacketPtr pkt);
170 virtual void recvRangeChange() { }
171 virtual void recvReqRetry();
172
173 virtual void
174 recvRespRetry()
175 {
176 fatal("recvRespRetry() is not implemented in the TLB coalescer.\n");
177 }
178
179 virtual AddrRangeList getAddrRanges() const;
180 };
181
182 class MemSidePort : public MasterPort
183 {
184 public:
185 MemSidePort(const std::string &_name, TLBCoalescer *tlb_coalescer,
186 PortID _index)
187 : MasterPort(_name, tlb_coalescer), coalescer(tlb_coalescer),
188 index(_index) { }
189
190 std::deque<PacketPtr> retries;
191
192 protected:
193 TLBCoalescer *coalescer;
194 int index;
195
196 virtual bool recvTimingResp(PacketPtr pkt);
197 virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
198 virtual void recvFunctional(PacketPtr pkt);
199 virtual void recvRangeChange() { }
200 virtual void recvReqRetry();
201
202 virtual void
203 recvRespRetry()
204 {
205 fatal("recvRespRetry() not implemented in TLB coalescer");
206 }
207 };
208
209 // Coalescer slave ports on the cpu Side
210 std::vector<CpuSidePort*> cpuSidePort;
211 // Coalescer master ports on the memory side
212 std::vector<MemSidePort*> memSidePort;
213
214 BaseMasterPort& getMasterPort(const std::string &if_name, PortID idx);
215 BaseSlavePort& getSlavePort(const std::string &if_name, PortID idx);
216
217 void processProbeTLBEvent();
218 /// This event issues the TLB probes
219 EventFunctionWrapper probeTLBEvent;
220
221 void processCleanupEvent();
222 /// The cleanupEvent is scheduled after a TLBEvent triggers
223 /// in order to free memory and do the required clean-up
224 EventFunctionWrapper cleanupEvent;
225
226 // this FIFO queue keeps track of the virt. page
227 // addresses that are pending cleanup
228 std::queue<Addr> cleanupQueue;
229 };
230
231 #endif // __TLB_COALESCER_HH__