gpu-compute,mem-ruby: Refactor GPU coalescer
[gem5.git] / src / base / cp_annotate.hh
1 /*
2 * Copyright (c) 2014 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2006-2009 The Regents of The University of Michigan
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 #ifndef __BASE__CP_ANNOTATE_HH__
42 #define __BASE__CP_ANNOTATE_HH__
43
44 #include <list>
45 #include <map>
46 #include <memory>
47 #include <string>
48 #include <unordered_map>
49 #include <vector>
50
51 #include "base/loader/symtab.hh"
52 #include "base/trace.hh"
53 #include "base/types.hh"
54 #include "debug/AnnotateQ.hh"
55 #include "config/cp_annotate.hh"
56 #include "config/the_isa.hh"
57 #include "sim/serialize.hh"
58 #include "sim/system.hh"
59
60 #if CP_ANNOTATE
61 #include "params/CPA.hh"
62 #endif
63
64 class System;
65 class ThreadContext;
66
67
68 #if !CP_ANNOTATE
69 class CPA
70 {
71 public:
72 enum flags {
73 FL_NONE = 0x00,
74 FL_HW = 0x01,
75 FL_BAD = 0x02,
76 FL_QOPP = 0x04,
77 FL_WAIT = 0x08,
78 FL_LINK = 0x10,
79 FL_RESET = 0x20
80 };
81
82 static CPA *cpa() { return NULL; }
83 static bool available() { return false; }
84 bool enabled() { return false; }
85 void swSmBegin(ThreadContext *tc, Addr sm_string,
86 int32_t sm_id, int32_t flags) { return; }
87 void swSmEnd(ThreadContext *tc, Addr sm_string) { return; }
88 void swExplictBegin(ThreadContext *tc, int32_t flags,
89 Addr st_string) { return; }
90 void swAutoBegin(ThreadContext *tc, Addr next_pc) { return; }
91 void swEnd(ThreadContext *tc) { return; }
92 void swQ(ThreadContext *tc, Addr id, Addr q_string,
93 int32_t count) { return; }
94 void swDq(ThreadContext *tc, Addr id, Addr q_string,
95 int32_t count) { return; }
96 void swPq(ThreadContext *tc, Addr id, Addr q_string,
97 int32_t count) { return; }
98 void swRq(ThreadContext *tc, Addr id, Addr q_string,
99 int32_t count) { return; }
100 void swWf(ThreadContext *tc, Addr id, Addr q_string,
101 Addr sm_string, int32_t count) { return; }
102 void swWe(ThreadContext *tc, Addr id, Addr q_string,
103 Addr sm_string, int32_t count) { return; }
104 void swSq(ThreadContext *tc, Addr id, Addr q_string,
105 int32_t size, int32_t flags) { return; }
106 void swAq(ThreadContext *tc, Addr id, Addr q_string,
107 int32_t size) { return; }
108 void swLink(ThreadContext *tc, Addr lsm_string,
109 Addr lsm_id, Addr sm_string) { return; }
110 void swIdentify(ThreadContext *tc, Addr smi_string) { return; }
111 uint64_t swGetId(ThreadContext *tc) { return 0; }
112 void swSyscallLink(ThreadContext *tc, Addr lsm_string,
113 Addr sm_string) { return; }
114 void hwBegin(flags f, System *sys, uint64_t frame, std::string sm,
115 std::string st) { return; }
116 void hwQ(flags f, System *sys, uint64_t frame, std::string sm,
117 std::string q, uint64_t qid, System *q_sys = NULL,
118 int32_t count = 1) { return; }
119 void hwDq(flags f, System *sys, uint64_t frame, std::string sm,
120 std::string q, uint64_t qid, System *q_sys = NULL,
121 int32_t count = 1) { return; }
122 void hwPq(flags f, System *sys, uint64_t frame, std::string sm,
123 std::string q, uint64_t qid, System *q_sys = NULL,
124 int32_t count = 1) { return; }
125 void hwRq(flags f, System *sys, uint64_t frame, std::string sm,
126 std::string q, uint64_t qid, System *q_sys = NULL,
127 int32_t count = 1) { return; }
128 void hwWf(flags f, System *sys, uint64_t frame, std::string sm,
129 std::string q, uint64_t qid, System *q_sys = NULL,
130 int32_t count = 1) { return; }
131 void hwWe(flags f, System *sys, uint64_t frame, std::string sm,
132 std::string q, uint64_t qid, System *q_sys = NULL,
133 int32_t count = 1) { return; }
134 };
135 #else
136
137 /**
138 * Provide a hash function for the CPI Id type
139 */
140 namespace std {
141 template <>
142 struct hash<std::pair<std::string, uint64_t> >
143 {
144
145 size_t
146 operator()(const std::pair<std::string, uint64_t>& x) const
147 {
148 return hash<std::string>()(x.first);
149 }
150
151 };
152 }
153
154 class CPA : SimObject
155 {
156 public:
157 typedef CPAParams Params;
158
159 /** The known operations that are written to the annotation output file. */
160 enum ops {
161 OP_BEGIN = 0x01,
162 OP_WAIT_EMPTY = 0x02,
163 OP_WAIT_FULL = 0x03,
164 OP_QUEUE = 0x04,
165 OP_DEQUEUE = 0x05,
166 OP_SIZE_QUEUE = 0x08,
167 OP_PEEK = 0x09,
168 OP_LINK = 0x0A,
169 OP_IDENT = 0x0B,
170 OP_RESERVE = 0x0C
171 };
172
173 /** Flags for the various options.*/
174 enum flags {
175 /* no flags */
176 FL_NONE = 0x00,
177 /* operation was done on hardware */
178 FL_HW = 0x01,
179 /* operation should cause a warning when encountered */
180 FL_BAD = 0x02,
181 /* Queue like a stack, not a queue */
182 FL_QOPP = 0x04,
183 /* Mark HW state as waiting for some non-resource constraint
184 * (e.g. wait because SM only starts after 10 items are queued) */
185 FL_WAIT = 0x08,
186 /* operation is linking to another state machine */
187 FL_LINK = 0x10,
188 /* queue should be completely cleared/reset before executing this
189 * operation */
190 FL_RESET = 0x20
191 };
192
193
194
195 protected:
196 const Params *
197 params() const
198 {
199 return dynamic_cast<const Params *>(_params);
200 }
201
202 /* struct that is written to the annotation output file */
203 struct AnnotateData : public Serializable {
204
205 Tick time;
206 uint32_t data;
207 uint32_t orig_data;
208 uint16_t sm;
209 uint16_t stq;
210 uint8_t op;
211 uint8_t flag;
212 uint8_t cpu;
213 bool dump;
214
215 void serialize(CheckpointOut &cp) const override;
216 void unserialize(CheckpointIn &cp) override;
217 };
218
219 typedef std::shared_ptr<AnnotateData> AnnDataPtr;
220
221 /* header for the annotation file */
222 struct AnnotateHeader {
223 uint64_t version;
224 uint64_t num_recs;
225 uint64_t key_off;
226 uint64_t idx_off;
227 uint32_t key_len;
228 uint32_t idx_len;
229 };
230
231 AnnotateHeader ah;
232
233 std::vector<uint64_t> annotateIdx;
234
235 // number of state machines encountered in the simulation
236 int numSm;
237 // number of states encountered in the simulation
238 int numSmt;
239 // number of states/queues for a given state machine/system respectively
240 std::vector<int> numSt, numQ;
241 // number of systems in the simulation
242 int numSys;
243 // number of queues in the state machine
244 int numQs;
245 // maximum connection id assigned so far
246 uint64_t conId;
247
248 // Convert state strings into state ids
249 typedef std::unordered_map<std::string, int> SCache;
250 typedef std::vector<SCache> StCache;
251
252 // Convert sm and queue name,id into queue id
253 typedef std::pair<std::string, uint64_t> Id;
254 typedef std::unordered_map<Id, int> IdHCache;
255 typedef std::vector<IdHCache> IdCache;
256
257 // Hold mapping of sm and queues to output python
258 typedef std::vector<std::pair<int, Id> > IdMap;
259
260 // System pointer to name,id
261 typedef std::map<System*, std::pair<std::string, int> > NameCache;
262
263 // array of systems each of which is a stack of running sm
264 typedef std::pair<int, uint64_t> StackId;
265 typedef std::map<StackId, std::vector<int> > SmStack;
266
267 // map of each context and if it's currently in explict state mode
268 // states are not automatically updated until it leaves
269 typedef std::map<StackId, bool> SwExpl;
270
271 typedef std::map<int,int> IMap;
272 // List of annotate records have not been written/completed yet
273 typedef std::list<AnnDataPtr> AnnotateList;
274
275 // Maintain link state information
276 typedef std::map<int, int> LinkMap;
277
278 // SC Links
279 typedef std::unordered_map<Id, AnnDataPtr> ScHCache;
280 typedef std::vector<ScHCache> ScCache;
281
282
283 AnnotateList data;
284
285 // vector indexed by queueid to find current number of elements and bytes
286 std::vector<int> qSize;
287 std::vector<int32_t> qBytes;
288
289
290 // Turn state machine string into state machine id (small int)
291 // Used for outputting key to convert id back into string
292 SCache smtCache;
293 // Turn state machine id, state name into state id (small int)
294 StCache stCache;
295 // turn system, queue, and queue identify into qid (small int)
296 // turn system, state, and context into state machine id (small int)
297 IdCache qCache, smCache;
298 //Link state machines accross system calls
299 ScCache scLinks;
300 // System pointer to name,id
301 NameCache nameCache;
302 // Stack of state machines currently nested (should unwind correctly)
303 SmStack smStack;
304 // Map of currently outstanding links
305 LinkMap lnMap;
306 // If the state machine is currently exculding automatic changes
307 SwExpl swExpl;
308 // Last state that a given state machine was in
309 IMap lastState;
310 // Hold mapping of sm and queues to output python
311 IdMap smMap, qMap;
312 // Items still in queue, used for sanity checking
313 std::vector<AnnotateList> qData;
314
315 void doDq(System *sys, int flags, int cpu, int sm, std::string q, int qi,
316 int count);
317 void doQ(System *sys, int flags, int cpu, int sm, std::string q, int qi,
318 int count);
319
320 void doSwSmEnd(System *sys, int cpuid, std::string sm, uint64_t frame);
321
322 // Turn a system id, state machine string, state machine id into a small int
323 // for annotation output
324 int
325 getSm(int sysi, std::string si, uint64_t id)
326 {
327 int smi;
328 Id smid = Id(si, id);
329
330 smi = smCache[sysi-1][smid];
331 if (smi == 0) {
332 smCache[sysi-1][smid] = smi = ++numSm;
333 assert(smi < 65535);
334 smMap.push_back(std::make_pair(sysi, smid));
335 }
336 return smi;
337 }
338
339 // Turn a state machine string, state string into a small int
340 // for annotation output
341 int
342 getSt(std::string sm, std::string s)
343 {
344 int sti, smi;
345
346 smi = smtCache[sm];
347 if (smi == 0)
348 smi = smtCache[sm] = ++numSmt;
349
350 while (stCache.size() < smi) {
351 //stCache.resize(sm);
352 stCache.push_back(SCache());
353 numSt.push_back(0);
354 }
355 //assert(stCache.size() == sm);
356 //assert(numSt.size() == sm);
357 sti = stCache[smi-1][s];
358 if (sti == 0)
359 stCache[smi-1][s] = sti = ++numSt[smi-1];
360 return sti;
361 }
362
363 // Turn state machine pointer into a smal int for annotation output
364 int
365 getSys(System *s)
366 {
367 NameCache::iterator i = nameCache.find(s);
368 if (i == nameCache.end()) {
369 nameCache[s] = std::make_pair(s->name(), ++numSys);
370 i = nameCache.find(s);
371 // might need to put smstackid into map here, but perhaps not
372 //smStack.push_back(std::vector<int>());
373 //swExpl.push_back(false);
374 numQ.push_back(0);
375 qCache.push_back(IdHCache());
376 smCache.push_back(IdHCache());
377 scLinks.push_back(ScHCache());
378 }
379 return i->second.second;
380 }
381
382 // Turn queue name, and queue context into small int for
383 // annotation output
384 int
385 getQ(int sys, std::string q, uint64_t id)
386 {
387 int qi;
388 Id qid = Id(q, id);
389
390 qi = qCache[sys-1][qid];
391 if (qi == 0) {
392 qi = qCache[sys-1][qid] = ++numQs;
393 assert(qi < 65535);
394 qSize.push_back(0);
395 qBytes.push_back(0);
396 qData.push_back(AnnotateList());
397 numQ[sys-1]++;
398 qMap.push_back(std::make_pair(sys, qid));
399 }
400 return qi;
401 }
402
403 void swBegin(System *sys, int cpuid, std::string st, uint64_t frame,
404 bool expl = false, int flags = FL_NONE);
405
406 AnnDataPtr add(int t, int f, int c, int sm, int stq, int32_t data=0);
407
408 std::ostream *osbin;
409
410 bool _enabled;
411
412 /** Only allow one CPA object in a system. It doesn't make sense to have
413 * more that one per simulation because if a part of the system was
414 * important it would have annotations and queues, and with more than one
415 * object none of the sanity checking for queues will work. */
416 static bool exists;
417 static CPA *_cpa;
418
419
420 std::map<std::string, Loader::SymbolTable*> userApp;
421
422 public:
423 static CPA *cpa() { return _cpa; }
424 void swSmBegin(ThreadContext *tc);
425 void swSmEnd(ThreadContext *tc);
426 void swExplictBegin(ThreadContext *tc);
427 void swAutoBegin(ThreadContext *tc, Addr next_pc);
428 void swEnd(ThreadContext *tc);
429 void swQ(ThreadContext *tc);
430 void swDq(ThreadContext *tc);
431 void swPq(ThreadContext *tc);
432 void swRq(ThreadContext *tc);
433 void swWf(ThreadContext *tc);
434 void swWe(ThreadContext *tc);
435 void swSq(ThreadContext *tc);
436 void swAq(ThreadContext *tc);
437 void swLink(ThreadContext *tc);
438 void swIdentify(ThreadContext *tc);
439 uint64_t swGetId(ThreadContext *tc);
440 void swSyscallLink(ThreadContext *tc);
441
442 inline void hwBegin(flags f, System *sys, uint64_t frame, std::string sm,
443 std::string st)
444 {
445 if (!enabled())
446 return;
447
448 int sysi = getSys(sys);
449 int smi = getSm(sysi, sm, frame);
450 add(OP_BEGIN, FL_HW | f, 0, smi, getSt(sm, st));
451 if (f & FL_BAD)
452 warn("BAD state encountered: at cycle %d: %s\n", curTick(), st);
453 }
454
455 inline void hwQ(flags f, System *sys, uint64_t frame, std::string sm,
456 std::string q, uint64_t qid, System *q_sys = NULL, int32_t count = 1)
457 {
458 if (!enabled())
459 return;
460
461 int sysi = getSys(sys);
462 int qi = getQ(q_sys ? getSys(q_sys) : sysi, q, qid);
463 DPRINTFS(AnnotateQ, sys,
464 "hwQ: %s[%#x] cur size %d %d bytes: %d adding: %d\n",
465 q, qid, qSize[qi-1], qData[qi-1].size(), qBytes[qi-1], count);
466 doQ(sys, FL_HW | f, 0, getSm(sysi, sm, frame), q, qi, count);
467
468 }
469
470 inline void hwDq(flags f, System *sys, uint64_t frame, std::string sm,
471 std::string q, uint64_t qid, System *q_sys = NULL, int32_t count = 1)
472 {
473 if (!enabled())
474 return;
475
476 int sysi = getSys(sys);
477 int qi = getQ(q_sys ? getSys(q_sys) : sysi, q, qid);
478 DPRINTFS(AnnotateQ, sys,
479 "hwDQ: %s[%#x] cur size %d %d bytes: %d removing: %d\n",
480 q, qid, qSize[qi-1], qData[qi-1].size(), qBytes[qi-1], count);
481 doDq(sys, FL_HW | f, 0, getSm(sysi,sm, frame), q, qi, count);
482 }
483
484 inline void hwPq(flags f, System *sys, uint64_t frame, std::string sm,
485 std::string q, uint64_t qid, System *q_sys = NULL, int32_t count = 1)
486 {
487 if (!enabled())
488 return;
489
490 int sysi = getSys(sys);
491 int qi = getQ(q_sys ? getSys(q_sys) : sysi, q, qid);
492 DPRINTFS(AnnotateQ, sys,
493 "hwPQ: %s[%#x] cur size %d %d bytes: %d peeking: %d\n",
494 q, qid, qSize[qi-1], qData[qi-1].size(), qBytes[qi-1], count);
495 add(OP_PEEK, FL_HW | f, 0, getSm(sysi, sm, frame), qi, count);
496 }
497
498 inline void hwRq(flags f, System *sys, uint64_t frame, std::string sm,
499 std::string q, uint64_t qid, System *q_sys = NULL, int32_t count = 1)
500 {
501 if (!enabled())
502 return;
503
504 int sysi = getSys(sys);
505 int qi = getQ(q_sys ? getSys(q_sys) : sysi, q, qid);
506 DPRINTFS(AnnotateQ, sys,
507 "hwRQ: %s[%#x] cur size %d %d bytes: %d reserving: %d\n",
508 q, qid, qSize[qi-1], qData[qi-1].size(), qBytes[qi-1], count);
509 add(OP_RESERVE, FL_HW | f, 0, getSm(sysi, sm, frame), qi, count);
510 }
511
512 inline void hwWf(flags f, System *sys, uint64_t frame, std::string sm,
513 std::string q, uint64_t qid, System *q_sys = NULL, int32_t count = 1)
514 {
515 if (!enabled())
516 return;
517
518 int sysi = getSys(sys);
519 int qi = getQ(q_sys ? getSys(q_sys) : sysi, q, qid);
520 add(OP_WAIT_FULL, FL_HW | f, 0, getSm(sysi, sm, frame), qi, count);
521 }
522
523 inline void hwWe(flags f, System *sys, uint64_t frame, std::string sm,
524 std::string q, uint64_t qid, System *q_sys = NULL, int32_t count = 1)
525 {
526 if (!enabled())
527 return;
528
529 int sysi = getSys(sys);
530 int qi = getQ(q_sys ? getSys(q_sys) : sysi, q, qid);
531 add(OP_WAIT_EMPTY, FL_HW | f, 0, getSm(sysi, sm, frame), qi, count);
532 }
533
534 public:
535 CPA(Params *p);
536 void startup();
537
538 uint64_t getFrame(ThreadContext *tc);
539
540 static bool available() { return true; }
541
542 bool
543 enabled()
544 {
545 if (!this)
546 return false;
547 return _enabled;
548 }
549
550 void dump(bool all);
551 void dumpKey();
552
553 void serialize(CheckpointOut &cp) const override;
554 void unserialize(CheckpointIn &cp) override;
555 };
556 #endif // !CP_ANNOTATE
557
558 #endif //__BASE__CP_ANNOTATE_HH__
559