2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 This file has been modified by Kevin Moore and Dan Nussbaum of the
31 Scalable Systems Research Group at Sun Microsystems Laboratories
32 (http://research.sun.com/scalable/) to support the Adaptive
33 Transactional Memory Test Platform (ATMTP).
35 Please send email to atmtp-interest@sun.com with feedback, questions, or
36 to request future announcements about ATMTP.
38 ----------------------------------------------------------------------
40 File modification date: 2008-02-23
42 ----------------------------------------------------------------------
57 #include "mem/ruby/libruby.hh"
59 #include "mem/ruby/common/Global.hh"
60 #include "mem/protocol/GenericMachineType.hh"
61 #include "mem/ruby/common/Histogram.hh"
62 #include "mem/ruby/common/Consumer.hh"
63 #include "mem/protocol/AccessModeType.hh"
64 #include "mem/protocol/AccessType.hh"
65 #include "mem/ruby/system/NodeID.hh"
66 #include "mem/ruby/system/MachineID.hh"
67 #include "mem/protocol/PrefetchBit.hh"
68 #include "mem/ruby/common/Address.hh"
69 #include "mem/ruby/common/Set.hh"
70 #include "mem/protocol/CacheRequestType.hh"
71 #include "mem/protocol/GenericRequestType.hh"
72 #include "mem/ruby/system/MemoryControl.hh"
76 class AddressProfiler;
78 template <class KEY_TYPE, class VALUE_TYPE> class Map;
80 struct memory_control_profiler {
81 long long int m_memReq;
82 long long int m_memBankBusy;
83 long long int m_memBusBusy;
84 long long int m_memTfawBusy;
85 long long int m_memReadWriteBusy;
86 long long int m_memDataBusBusy;
87 long long int m_memRefresh;
88 long long int m_memRead;
89 long long int m_memWrite;
90 long long int m_memWaitCycles;
91 long long int m_memInputQ;
92 long long int m_memBankQ;
93 long long int m_memArbWait;
94 long long int m_memRandBusy;
95 long long int m_memNotOld;
96 Vector<long long int> m_memBankCount;
99 int m_dimms_per_channel;
103 class Profiler : public Consumer {
106 Profiler(const string & name);
108 void init(const vector<string> & argv, vector<string> memory_control_names);
116 void setPeriodicStatsFile(const string& filename);
117 void setPeriodicStatsInterval(integer_t period);
119 void printStats(ostream& out, bool short_stats=false);
120 void printShortStats(ostream& out) { printStats(out, true); }
121 void printTraceStats(ostream& out) const;
123 void printConfig(ostream& out) const;
124 void printResourceUsage(ostream& out) const;
126 AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; }
127 AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; }
129 void addAddressTraceSample(const CacheMsg& msg, NodeID id);
131 void profileRequest(const string& requestStr);
132 void profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner);
134 void profileMulticastRetry(const Address& addr, int count);
136 void profileFilterAction(int action);
138 void profileConflictingRequests(const Address& addr);
139 void profileOutstandingRequest(int outstanding) { m_outstanding_requests.add(outstanding); }
140 void profileOutstandingPersistentRequest(int outstanding) { m_outstanding_persistent_requests.add(outstanding); }
141 void profileAverageLatencyEstimate(int latency) { m_average_latency_estimate.add(latency); }
143 void countBAUnicast() { m_num_BA_unicasts++; }
144 void countBABroadcast() { m_num_BA_broadcasts++; }
146 void recordPrediction(bool wasGood, bool wasPredicted);
148 void startTransaction(int cpu);
149 void endTransaction(int cpu);
150 void profilePFWait(Time waitTime);
152 void controllerBusy(MachineID machID);
154 void missLatency(Time t, RubyRequestType type);
155 void swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach);
156 void stopTableUsageSample(int num) { m_stopTableProfile.add(num); }
157 void L1tbeUsageSample(int num) { m_L1tbeProfile.add(num); }
158 void L2tbeUsageSample(int num) { m_L2tbeProfile.add(num); }
159 void sequencerRequests(int num) { m_sequencer_requests.add(num); }
160 void storeBuffer(int size, int blocks) { m_store_buffer_size.add(size); m_store_buffer_blocks.add(blocks);}
162 void profileGetXMaskPrediction(const Set& pred_set);
163 void profileGetSMaskPrediction(const Set& pred_set);
164 void profileTrainingMask(const Set& pred_set);
165 void profileTransition(const string& component, NodeID version, Address addr,
166 const string& state, const string& event,
167 const string& next_state, const string& note);
168 void profileMsgDelay(int virtualNetwork, int delayCycles);
170 void print(ostream& out) const;
172 int64 getTotalInstructionsExecuted() const;
173 int64 getTotalTransactionsExecuted() const;
175 void rubyWatch(int proc);
176 bool watchAddress(Address addr);
178 // return Ruby's start time
179 Time getRubyStartTime(){
183 // added for MemoryControl:
184 void profileMemReq(string name, int bank);
185 void profileMemBankBusy(string name);
186 void profileMemBusBusy(string name);
187 void profileMemTfawBusy(string name);
188 void profileMemReadWriteBusy(string name);
189 void profileMemDataBusBusy(string name);
190 void profileMemRefresh(string name);
191 void profileMemRead(string name);
192 void profileMemWrite(string name);
193 void profileMemWaitCycles(string name, int cycles);
194 void profileMemInputQ(string name, int cycles);
195 void profileMemBankQ(string name, int cycles);
196 void profileMemArbWait(string name, int cycles);
197 void profileMemRandBusy(string name);
198 void profileMemNotOld(string name);
200 bool getHotLines() { return m_hot_lines; }
201 bool getAllInstructions() { return m_all_instructions; }
205 vector<string> m_memory_control_names;
207 // Private copy constructor and assignment operator
208 Profiler(const Profiler& obj);
209 Profiler& operator=(const Profiler& obj);
211 // Data Members (m_ prefix)
212 CacheProfiler* m_L1D_cache_profiler_ptr;
213 CacheProfiler* m_L1I_cache_profiler_ptr;
214 CacheProfiler* m_L2_cache_profiler_ptr;
215 AddressProfiler* m_address_profiler_ptr;
216 AddressProfiler* m_inst_profiler_ptr;
218 Vector<int64> m_instructions_executed_at_start;
219 Vector<int64> m_cycles_executed_at_start;
221 ostream* m_periodic_output_file_ptr;
222 integer_t m_stats_period;
225 time_t m_real_time_start_time;
227 int m_num_BA_unicasts;
228 int m_num_BA_broadcasts;
230 Vector<integer_t> m_perProcTotalMisses;
231 Vector<integer_t> m_perProcUserMisses;
232 Vector<integer_t> m_perProcSupervisorMisses;
233 Vector<integer_t> m_perProcStartTransaction;
234 Vector<integer_t> m_perProcEndTransaction;
235 Vector < Vector < integer_t > > m_busyControllerCount;
236 integer_t m_busyBankCount;
237 Histogram m_multicast_retry_histogram;
239 Histogram m_L1tbeProfile;
240 Histogram m_L2tbeProfile;
241 Histogram m_stopTableProfile;
243 Histogram m_filter_action_histogram;
244 Histogram m_tbeProfile;
246 Histogram m_sequencer_requests;
247 Histogram m_store_buffer_size;
248 Histogram m_store_buffer_blocks;
249 Histogram m_read_sharing_histogram;
250 Histogram m_write_sharing_histogram;
251 Histogram m_all_sharing_histogram;
252 int64 m_cache_to_cache;
253 int64 m_memory_to_cache;
255 Histogram m_prefetchWaitHistogram;
257 Vector<Histogram> m_missLatencyHistograms;
258 Vector<Histogram> m_machLatencyHistograms;
259 Histogram m_L2MissLatencyHistogram;
260 Histogram m_allMissLatencyHistogram;
262 Histogram m_allSWPrefetchLatencyHistogram;
263 Histogram m_SWPrefetchL2MissLatencyHistogram;
264 Vector<Histogram> m_SWPrefetchLatencyHistograms;
265 Vector<Histogram> m_SWPrefetchMachLatencyHistograms;
267 Histogram m_delayedCyclesHistogram;
268 Histogram m_delayedCyclesNonPFHistogram;
269 Vector<Histogram> m_delayedCyclesVCHistograms;
272 int m_predictionOpportunities;
273 int m_goodPredictions;
275 Histogram m_gets_mask_prediction;
276 Histogram m_getx_mask_prediction;
277 Histogram m_explicit_training_mask;
279 // For profiling possibly conflicting requests
280 Map<Address, Time>* m_conflicting_map_ptr;
281 Histogram m_conflicting_histogram;
283 Histogram m_outstanding_requests;
284 Histogram m_outstanding_persistent_requests;
286 Histogram m_average_latency_estimate;
288 Map<Address, int>* m_watch_address_list_ptr;
289 // counts all initiated cache request including PUTs
291 Map <string, int>* m_requestProfileMap_ptr;
293 // added for MemoryControl:
295 map< string, memory_control_profiler* > m_memory_control_profilers;
299 bool m_all_instructions;
304 // Output operator declaration
305 ostream& operator<<(ostream& out, const Profiler& obj);
307 // ******************* Definitions *******************
309 // Output operator definition
311 ostream& operator<<(ostream& out, const Profiler& obj)