mem-garnet: Integration of HeteroGarnet
[gem5.git] / src / mem / ruby / network / garnet2.0 / SwitchAllocator.cc
1 /*
2 * Copyright (c) 2020 Inria
3 * Copyright (c) 2016 Georgia Institute of Technology
4 * Copyright (c) 2008 Princeton University
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met: redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer;
11 * redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution;
14 * neither the name of the copyright holders nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31
32 #include "mem/ruby/network/garnet2.0/SwitchAllocator.hh"
33
34 #include "debug/RubyNetwork.hh"
35 #include "mem/ruby/network/garnet2.0/GarnetNetwork.hh"
36 #include "mem/ruby/network/garnet2.0/InputUnit.hh"
37 #include "mem/ruby/network/garnet2.0/OutputUnit.hh"
38 #include "mem/ruby/network/garnet2.0/Router.hh"
39
40 SwitchAllocator::SwitchAllocator(Router *router)
41 : Consumer(router)
42 {
43 m_router = router;
44 m_num_vcs = m_router->get_num_vcs();
45 m_vc_per_vnet = m_router->get_vc_per_vnet();
46
47 m_input_arbiter_activity = 0;
48 m_output_arbiter_activity = 0;
49 }
50
51 void
52 SwitchAllocator::init()
53 {
54 m_num_inports = m_router->get_num_inports();
55 m_num_outports = m_router->get_num_outports();
56 m_round_robin_inport.resize(m_num_outports);
57 m_round_robin_invc.resize(m_num_inports);
58 m_port_requests.resize(m_num_outports);
59 m_vc_winners.resize(m_num_outports);
60
61 for (int i = 0; i < m_num_inports; i++) {
62 m_round_robin_invc[i] = 0;
63 }
64
65 for (int i = 0; i < m_num_outports; i++) {
66 m_port_requests[i].resize(m_num_inports);
67 m_vc_winners[i].resize(m_num_inports);
68
69 m_round_robin_inport[i] = 0;
70
71 for (int j = 0; j < m_num_inports; j++) {
72 m_port_requests[i][j] = false; // [outport][inport]
73 }
74 }
75 }
76
77 /*
78 * The wakeup function of the SwitchAllocator performs a 2-stage
79 * seperable switch allocation. At the end of the 2nd stage, a free
80 * output VC is assigned to the winning flits of each output port.
81 * There is no separate VCAllocator stage like the one in garnet1.0.
82 * At the end of this function, the router is rescheduled to wakeup
83 * next cycle for peforming SA for any flits ready next cycle.
84 */
85
86 void
87 SwitchAllocator::wakeup()
88 {
89 arbitrate_inports(); // First stage of allocation
90 arbitrate_outports(); // Second stage of allocation
91
92 clear_request_vector();
93 check_for_wakeup();
94 }
95
96 /*
97 * SA-I (or SA-i) loops through all input VCs at every input port,
98 * and selects one in a round robin manner.
99 * - For HEAD/HEAD_TAIL flits only selects an input VC whose output port
100 * has at least one free output VC.
101 * - For BODY/TAIL flits, only selects an input VC that has credits
102 * in its output VC.
103 * Places a request for the output port from this input VC.
104 */
105
106 void
107 SwitchAllocator::arbitrate_inports()
108 {
109 // Select a VC from each input in a round robin manner
110 // Independent arbiter at each input port
111 for (int inport = 0; inport < m_num_inports; inport++) {
112 int invc = m_round_robin_invc[inport];
113
114 for (int invc_iter = 0; invc_iter < m_num_vcs; invc_iter++) {
115 auto input_unit = m_router->getInputUnit(inport);
116
117 if (input_unit->need_stage(invc, SA_, curTick())) {
118 // This flit is in SA stage
119
120 int outport = input_unit->get_outport(invc);
121 int outvc = input_unit->get_outvc(invc);
122
123 // check if the flit in this InputVC is allowed to be sent
124 // send_allowed conditions described in that function.
125 bool make_request =
126 send_allowed(inport, invc, outport, outvc);
127
128 if (make_request) {
129 m_input_arbiter_activity++;
130 m_port_requests[outport][inport] = true;
131 m_vc_winners[outport][inport]= invc;
132
133 // Update Round Robin pointer to the next VC
134 m_round_robin_invc[inport] = invc + 1;
135 if (m_round_robin_invc[inport] >= m_num_vcs)
136 m_round_robin_invc[inport] = 0;
137
138 break; // got one vc winner for this port
139 }
140 }
141
142 invc++;
143 if (invc >= m_num_vcs)
144 invc = 0;
145 }
146 }
147 }
148
149 /*
150 * SA-II (or SA-o) loops through all output ports,
151 * and selects one input VC (that placed a request during SA-I)
152 * as the winner for this output port in a round robin manner.
153 * - For HEAD/HEAD_TAIL flits, performs simplified outvc allocation.
154 * (i.e., select a free VC from the output port).
155 * - For BODY/TAIL flits, decrement a credit in the output vc.
156 * The winning flit is read out from the input VC and sent to the
157 * CrossbarSwitch.
158 * An increment_credit signal is sent from the InputUnit
159 * to the upstream router. For HEAD_TAIL/TAIL flits, is_free_signal in the
160 * credit is set to true.
161 */
162
163 void
164 SwitchAllocator::arbitrate_outports()
165 {
166 // Now there are a set of input vc requests for output vcs.
167 // Again do round robin arbitration on these requests
168 // Independent arbiter at each output port
169 for (int outport = 0; outport < m_num_outports; outport++) {
170 int inport = m_round_robin_inport[outport];
171
172 for (int inport_iter = 0; inport_iter < m_num_inports;
173 inport_iter++) {
174
175 // inport has a request this cycle for outport
176 if (m_port_requests[outport][inport]) {
177 auto output_unit = m_router->getOutputUnit(outport);
178 auto input_unit = m_router->getInputUnit(inport);
179
180 // grant this outport to this inport
181 int invc = m_vc_winners[outport][inport];
182
183 int outvc = input_unit->get_outvc(invc);
184 if (outvc == -1) {
185 // VC Allocation - select any free VC from outport
186 outvc = vc_allocate(outport, inport, invc);
187 }
188
189 // remove flit from Input VC
190 flit *t_flit = input_unit->getTopFlit(invc);
191
192 DPRINTF(RubyNetwork, "SwitchAllocator at Router %d "
193 "granted outvc %d at outport %d "
194 "to invc %d at inport %d to flit %s at "
195 "cycle: %lld\n",
196 m_router->get_id(), outvc,
197 m_router->getPortDirectionName(
198 output_unit->get_direction()),
199 invc,
200 m_router->getPortDirectionName(
201 input_unit->get_direction()),
202 *t_flit,
203 m_router->curCycle());
204
205
206 // Update outport field in the flit since this is
207 // used by CrossbarSwitch code to send it out of
208 // correct outport.
209 // Note: post route compute in InputUnit,
210 // outport is updated in VC, but not in flit
211 t_flit->set_outport(outport);
212
213 // set outvc (i.e., invc for next hop) in flit
214 // (This was updated in VC by vc_allocate, but not in flit)
215 t_flit->set_vc(outvc);
216
217 // decrement credit in outvc
218 output_unit->decrement_credit(outvc);
219
220 // flit ready for Switch Traversal
221 t_flit->advance_stage(ST_, curTick());
222 m_router->grant_switch(inport, t_flit);
223 m_output_arbiter_activity++;
224
225 if ((t_flit->get_type() == TAIL_) ||
226 t_flit->get_type() == HEAD_TAIL_) {
227
228 // This Input VC should now be empty
229 assert(!(input_unit->isReady(invc, curTick())));
230
231 // Free this VC
232 input_unit->set_vc_idle(invc, curTick());
233
234 // Send a credit back
235 // along with the information that this VC is now idle
236 input_unit->increment_credit(invc, true, curTick());
237 } else {
238 // Send a credit back
239 // but do not indicate that the VC is idle
240 input_unit->increment_credit(invc, false, curTick());
241 }
242
243 // remove this request
244 m_port_requests[outport][inport] = false;
245
246 // Update Round Robin pointer
247 m_round_robin_inport[outport] = inport + 1;
248 if (m_round_robin_inport[outport] >= m_num_inports)
249 m_round_robin_inport[outport] = 0;
250
251 break; // got a input winner for this outport
252 }
253
254 inport++;
255 if (inport >= m_num_inports)
256 inport = 0;
257 }
258 }
259 }
260
261 /*
262 * A flit can be sent only if
263 * (1) there is at least one free output VC at the
264 * output port (for HEAD/HEAD_TAIL),
265 * or
266 * (2) if there is at least one credit (i.e., buffer slot)
267 * within the VC for BODY/TAIL flits of multi-flit packets.
268 * and
269 * (3) pt-to-pt ordering is not violated in ordered vnets, i.e.,
270 * there should be no other flit in this input port
271 * within an ordered vnet
272 * that arrived before this flit and is requesting the same output port.
273 */
274
275 bool
276 SwitchAllocator::send_allowed(int inport, int invc, int outport, int outvc)
277 {
278 // Check if outvc needed
279 // Check if credit needed (for multi-flit packet)
280 // Check if ordering violated (in ordered vnet)
281
282 int vnet = get_vnet(invc);
283 bool has_outvc = (outvc != -1);
284 bool has_credit = false;
285
286 auto output_unit = m_router->getOutputUnit(outport);
287 if (!has_outvc) {
288
289 // needs outvc
290 // this is only true for HEAD and HEAD_TAIL flits.
291
292 if (output_unit->has_free_vc(vnet)) {
293
294 has_outvc = true;
295
296 // each VC has at least one buffer,
297 // so no need for additional credit check
298 has_credit = true;
299 }
300 } else {
301 has_credit = output_unit->has_credit(outvc);
302 }
303
304 // cannot send if no outvc or no credit.
305 if (!has_outvc || !has_credit)
306 return false;
307
308
309 // protocol ordering check
310 if ((m_router->get_net_ptr())->isVNetOrdered(vnet)) {
311 auto input_unit = m_router->getInputUnit(inport);
312
313 // enqueue time of this flit
314 Tick t_enqueue_time = input_unit->get_enqueue_time(invc);
315
316 // check if any other flit is ready for SA and for same output port
317 // and was enqueued before this flit
318 int vc_base = vnet*m_vc_per_vnet;
319 for (int vc_offset = 0; vc_offset < m_vc_per_vnet; vc_offset++) {
320 int temp_vc = vc_base + vc_offset;
321 if (input_unit->need_stage(temp_vc, SA_, curTick()) &&
322 (input_unit->get_outport(temp_vc) == outport) &&
323 (input_unit->get_enqueue_time(temp_vc) < t_enqueue_time)) {
324 return false;
325 }
326 }
327 }
328
329 return true;
330 }
331
332 // Assign a free VC to the winner of the output port.
333 int
334 SwitchAllocator::vc_allocate(int outport, int inport, int invc)
335 {
336 // Select a free VC from the output port
337 int outvc =
338 m_router->getOutputUnit(outport)->select_free_vc(get_vnet(invc));
339
340 // has to get a valid VC since it checked before performing SA
341 assert(outvc != -1);
342 m_router->getInputUnit(inport)->grant_outvc(invc, outvc);
343 return outvc;
344 }
345
346 // Wakeup the router next cycle to perform SA again
347 // if there are flits ready.
348 void
349 SwitchAllocator::check_for_wakeup()
350 {
351 Tick nextCycle = m_router->clockEdge(Cycles(1));
352
353 for (int i = 0; i < m_num_inports; i++) {
354 for (int j = 0; j < m_num_vcs; j++) {
355 if (m_router->getInputUnit(i)->need_stage(j, SA_, nextCycle)) {
356 m_router->schedule_wakeup(Cycles(1));
357 return;
358 }
359 }
360 }
361 }
362
363 int
364 SwitchAllocator::get_vnet(int invc)
365 {
366 int vnet = invc/m_vc_per_vnet;
367 assert(vnet < m_router->get_num_vnets());
368 return vnet;
369 }
370
371
372 // Clear the request vector within the allocator at end of SA-II.
373 // Was populated by SA-I.
374 void
375 SwitchAllocator::clear_request_vector()
376 {
377 for (int i = 0; i < m_num_outports; i++) {
378 for (int j = 0; j < m_num_inports; j++) {
379 m_port_requests[i][j] = false;
380 }
381 }
382 }
383
384 void
385 SwitchAllocator::resetStats()
386 {
387 m_input_arbiter_activity = 0;
388 m_output_arbiter_activity = 0;
389 }