Merge zizzer:/bk/m5
[gem5.git] / util / stats / db.py
1 # Copyright (c) 2003-2004 The Regents of The University of Michigan
2 # All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
6 # met: redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer;
8 # redistributions in binary form must reproduce the above copyright
9 # notice, this list of conditions and the following disclaimer in the
10 # documentation and/or other materials provided with the distribution;
11 # neither the name of the copyright holders nor the names of its
12 # contributors may be used to endorse or promote products derived from
13 # this software without specific prior written permission.
14 #
15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 import MySQLdb, re, string
28
29 def statcmp(a, b):
30 v1 = a.split('.')
31 v2 = b.split('.')
32
33 last = min(len(v1), len(v2)) - 1
34 for i,j in zip(v1[0:last], v2[0:last]):
35 if i != j:
36 return cmp(i, j)
37
38 # Special compare for last element.
39 if len(v1) == len(v2):
40 return cmp(v1[last], v2[last])
41 else:
42 return cmp(len(v1), len(v2))
43
44 class RunData:
45 def __init__(self, row):
46 self.run = int(row[0])
47 self.name = row[1]
48 self.user = row[2]
49 self.project = row[3]
50
51 class SubData:
52 def __init__(self, row):
53 self.stat = int(row[0])
54 self.x = int(row[1])
55 self.y = int(row[2])
56 self.name = row[3]
57 self.descr = row[4]
58
59 class Data:
60 def __init__(self, row):
61 if len(row) != 5:
62 raise 'stat db error'
63 self.stat = int(row[0])
64 self.run = int(row[1])
65 self.x = int(row[2])
66 self.y = int(row[3])
67 self.data = float(row[4])
68
69 def __repr__(self):
70 return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat,
71 self.run, self.x, self.y, self.data)
72
73 class StatData(object):
74 def __init__(self, row):
75 self.stat = int(row[0])
76 self.name = row[1]
77 self.desc = row[2]
78 self.type = row[3]
79 self.prereq = int(row[5])
80 self.precision = int(row[6])
81
82 import flags
83 self.flags = 0
84 if int(row[4]): self.flags |= flags.printable
85 if int(row[7]): self.flags |= flags.nozero
86 if int(row[8]): self.flags |= flags.nonan
87 if int(row[9]): self.flags |= flags.total
88 if int(row[10]): self.flags |= flags.pdf
89 if int(row[11]): self.flags |= flags.cdf
90
91 if self.type == 'DIST' or self.type == 'VECTORDIST':
92 self.min = float(row[12])
93 self.max = float(row[13])
94 self.bktsize = float(row[14])
95 self.size = int(row[15])
96
97 if self.type == 'FORMULA':
98 self.formula = self.db.allFormulas[self.stat]
99
100 class Node(object):
101 def __init__(self, name):
102 self.name = name
103 def __str__(self):
104 return self.name
105
106 class Result(object):
107 def __init__(self, x, y):
108 self.data = {}
109 self.x = x
110 self.y = y
111
112 def __contains__(self, run):
113 return run in self.data
114
115 def __getitem__(self, run):
116 if run not in self.data:
117 self.data[run] = [ [ 0.0 ] * self.y for i in xrange(self.x) ]
118 return self.data[run]
119
120 class Database(object):
121 def __init__(self):
122 self.host = 'zizzer.pool'
123 self.user = ''
124 self.passwd = ''
125 self.db = 'm5stats'
126 self.cursor = None
127
128 self.allStats = []
129 self.allStatIds = {}
130 self.allStatNames = {}
131
132 self.allSubData = {}
133
134 self.allRuns = []
135 self.allRunIds = {}
136 self.allRunNames = {}
137
138 self.allBins = []
139 self.allBinIds = {}
140 self.allBinNames = {}
141
142 self.allFormulas = {}
143
144 self.stattop = {}
145 self.statdict = {}
146 self.statlist = []
147
148 self.mode = 'sum';
149 self.runs = None
150 self.bins = None
151 self.ticks = None
152 self.method = 'sum'
153 self._method = type(self).sum
154
155 def get(self, job, stat, system=None):
156 run = self.allRunNames.get(str(job), None)
157 if run is None:
158 return None
159
160 from info import ProxyError, scalar, vector, value, values, total, len
161 if system is None and hasattr(job, 'system'):
162 system = job.system
163
164 if system is not None:
165 stat.system = self[system]
166 try:
167 if scalar(stat):
168 return value(stat, run.run)
169 if vector(stat):
170 return values(stat, run.run)
171 except ProxyError:
172 return None
173
174 return None
175
176 def query(self, sql):
177 self.cursor.execute(sql)
178
179 def update_dict(self, dict):
180 dict.update(self.stattop)
181
182 def append(self, stat):
183 statname = re.sub(':', '__', stat.name)
184 path = string.split(statname, '.')
185 pathtop = path[0]
186 fullname = ''
187
188 x = self
189 while len(path) > 1:
190 name = path.pop(0)
191 if not x.__dict__.has_key(name):
192 x.__dict__[name] = Node(fullname + name)
193 x = x.__dict__[name]
194 fullname = '%s%s.' % (fullname, name)
195
196 name = path.pop(0)
197 x.__dict__[name] = stat
198
199 self.stattop[pathtop] = self.__dict__[pathtop]
200 self.statdict[statname] = stat
201 self.statlist.append(statname)
202
203 def connect(self):
204 # connect
205 self.thedb = MySQLdb.connect(db=self.db,
206 host=self.host,
207 user=self.user,
208 passwd=self.passwd)
209
210 # create a cursor
211 self.cursor = self.thedb.cursor()
212
213 self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project
214 from runs''')
215 for result in self.cursor.fetchall():
216 run = RunData(result);
217 self.allRuns.append(run)
218 self.allRunIds[run.run] = run
219 self.allRunNames[run.name] = run
220
221 self.query('select * from bins')
222 for id,name in self.cursor.fetchall():
223 self.allBinIds[int(id)] = name
224 self.allBinNames[name] = int(id)
225
226 self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata')
227 for result in self.cursor.fetchall():
228 subdata = SubData(result)
229 if self.allSubData.has_key(subdata.stat):
230 self.allSubData[subdata.stat].append(subdata)
231 else:
232 self.allSubData[subdata.stat] = [ subdata ]
233
234 self.query('select * from formulas')
235 for id,formula in self.cursor.fetchall():
236 self.allFormulas[int(id)] = formula.tostring()
237
238 StatData.db = self
239 self.query('select * from stats')
240 import info
241 for result in self.cursor.fetchall():
242 stat = info.NewStat(self, StatData(result))
243 self.append(stat)
244 self.allStats.append(stat)
245 self.allStatIds[stat.stat] = stat
246 self.allStatNames[stat.name] = stat
247
248 # Name: listbins
249 # Desc: Prints all bins matching regex argument, if no argument
250 # is given all bins are returned
251 def listBins(self, regex='.*'):
252 print '%-50s %-10s' % ('bin name', 'id')
253 print '-' * 61
254 names = self.allBinNames.keys()
255 names.sort()
256 for name in names:
257 id = self.allBinNames[name]
258 print '%-50s %-10d' % (name, id)
259
260 # Name: listruns
261 # Desc: Prints all runs matching a given user, if no argument
262 # is given all runs are returned
263 def listRuns(self, user=None):
264 print '%-40s %-10s %-5s' % ('run name', 'user', 'id')
265 print '-' * 62
266 for run in self.allRuns:
267 if user == None or user == run.user:
268 print '%-40s %-10s %-10d' % (run.name, run.user, run.run)
269
270 # Name: listTicks
271 # Desc: Prints all samples for a given run
272 def listTicks(self, runs=None):
273 print "tick"
274 print "----------------------------------------"
275 sql = 'select distinct dt_tick from data where dt_stat=1180 and ('
276 if runs != None:
277 first = True
278 for run in runs:
279 if first:
280 # sql += ' where'
281 first = False
282 else:
283 sql += ' or'
284 sql += ' dt_run=%s' % run.run
285 sql += ')'
286 self.query(sql)
287 for r in self.cursor.fetchall():
288 print r[0]
289
290 # Name: retTicks
291 # Desc: Prints all samples for a given run
292 def retTicks(self, runs=None):
293 sql = 'select distinct dt_tick from data where dt_stat=1180 and ('
294 if runs != None:
295 first = True
296 for run in runs:
297 if first:
298 first = False
299 else:
300 sql += ' or'
301 sql += ' dt_run=%s' % run.run
302 sql += ')'
303 self.query(sql)
304 ret = []
305 for r in self.cursor.fetchall():
306 ret.append(r[0])
307 return ret
308
309 # Name: liststats
310 # Desc: Prints all statistics that appear in the database,
311 # the optional argument is a regular expression that can
312 # be used to prune the result set
313 def listStats(self, regex=None):
314 print '%-60s %-8s %-10s' % ('stat name', 'id', 'type')
315 print '-' * 80
316
317 rx = None
318 if regex != None:
319 rx = re.compile(regex)
320
321 stats = [ stat.name for stat in self.allStats ]
322 stats.sort(statcmp)
323 for stat in stats:
324 stat = self.allStatNames[stat]
325 if rx == None or rx.match(stat.name):
326 print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type)
327
328 # Name: liststats
329 # Desc: Prints all statistics that appear in the database,
330 # the optional argument is a regular expression that can
331 # be used to prune the result set
332 def listFormulas(self, regex=None):
333 print '%-60s %s' % ('formula name', 'formula')
334 print '-' * 80
335
336 rx = None
337 if regex != None:
338 rx = re.compile(regex)
339
340 stats = [ stat.name for stat in self.allStats ]
341 stats.sort(statcmp)
342 for stat in stats:
343 stat = self.allStatNames[stat]
344 if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)):
345 print '%-60s %s' % (stat.name, self.allFormulas[stat.stat])
346
347 def getStat(self, stats):
348 if type(stats) is not list:
349 stats = [ stats ]
350
351 ret = []
352 for stat in stats:
353 if type(stat) is int:
354 ret.append(self.allStatIds[stat])
355
356 if type(stat) is str:
357 rx = re.compile(stat)
358 for stat in self.allStats:
359 if rx.match(stat.name):
360 ret.append(stat)
361 return ret
362
363 def getBin(self, bins):
364 if type(bins) is not list:
365 bins = [ bins ]
366
367 ret = []
368 for bin in bins:
369 if type(bin) is int:
370 ret.append(bin)
371 elif type(bin) is str:
372 ret.append(self.allBinNames[bin])
373 else:
374 for name,id in self.allBinNames.items():
375 if bin.match(name):
376 ret.append(id)
377
378 return ret
379
380 def getNotBin(self, bin):
381 map = {}
382 for bin in getBin(bin):
383 map[bin] = 1
384
385 ret = []
386 for bin in self.allBinIds.keys():
387 if not map.has_key(bin):
388 ret.append(bin)
389
390 return ret
391
392 #########################################
393 # get the data
394 #
395 def inner(self, op, stat, bins, ticks, group=False):
396 sql = 'select '
397 sql += 'dt_stat as stat, '
398 sql += 'dt_run as run, '
399 sql += 'dt_x as x, '
400 sql += 'dt_y as y, '
401 if group:
402 sql += 'dt_tick as tick, '
403 sql += '%s(dt_data) as data ' % op
404 sql += 'from data '
405 sql += 'where '
406
407 if isinstance(stat, list):
408 val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ])
409 sql += ' (%s)' % val
410 else:
411 sql += ' dt_stat=%d' % stat.stat
412
413 if self.runs != None and len(self.runs):
414 val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ])
415 sql += ' and (%s)' % val
416
417 if bins != None and len(bins):
418 val = ' or '.join([ 'dt_bin=%d' % b for b in bins ])
419 sql += ' and (%s)' % val
420
421 if ticks != None and len(ticks):
422 val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ])
423 sql += ' and (%s)' % val
424
425 sql += ' group by dt_stat,dt_run,dt_x,dt_y'
426 if group:
427 sql += ',dt_tick'
428 return sql
429
430 def outer(self, op_out, op_in, stat, bins, ticks):
431 sql = self.inner(op_in, stat, bins, ticks, True)
432 sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql)
433 sql += 'group by stat,run,x,y'
434 return sql
435
436 # Name: sum
437 # Desc: given a run, a stat and an array of samples and bins,
438 # sum all the bins and then get the standard deviation of the
439 # samples for non-binned runs. This will just return the average
440 # of samples, however a bin array still must be passed
441 def sum(self, stat, bins, ticks):
442 return self.inner('sum', stat, bins, ticks)
443
444 # Name: avg
445 # Desc: given a run, a stat and an array of samples and bins,
446 # sum all the bins and then average the samples for non-binned
447 # runs this will just return the average of samples, however
448 # a bin array still must be passed
449 def avg(self, stat, bins, ticks):
450 return self.outer('avg', 'sum', stat, bins, ticks)
451
452 # Name: stdev
453 # Desc: given a run, a stat and an array of samples and bins,
454 # sum all the bins and then get the standard deviation of the
455 # samples for non-binned runs. This will just return the average
456 # of samples, however a bin array still must be passed
457 def stdev(self, stat, bins, ticks):
458 return self.outer('stddev', 'sum', stat, bins, ticks)
459
460 def __setattr__(self, attr, value):
461 super(Database, self).__setattr__(attr, value)
462 if attr != 'method':
463 return
464
465 if value == 'sum':
466 self._method = self.sum
467 elif value == 'avg':
468 self._method = self.avg
469 elif value == 'stdev':
470 self._method = self.stdev
471 else:
472 raise AttributeError, "can only set get to: sum | avg | stdev"
473
474 def data(self, stat, bins=None, ticks=None):
475 if bins is None:
476 bins = self.bins
477 if ticks is None:
478 ticks = self.ticks
479 sql = self._method(self, stat, bins, ticks)
480 self.query(sql)
481
482 runs = {}
483 xmax = 0
484 ymax = 0
485 for x in self.cursor.fetchall():
486 data = Data(x)
487 if not runs.has_key(data.run):
488 runs[data.run] = {}
489 if not runs[data.run].has_key(data.x):
490 runs[data.run][data.x] = {}
491
492 xmax = max(xmax, data.x)
493 ymax = max(ymax, data.y)
494 runs[data.run][data.x][data.y] = data.data
495
496 results = Result(xmax + 1, ymax + 1)
497 for run,data in runs.iteritems():
498 result = results[run]
499 for x,ydata in data.iteritems():
500 for y,data in ydata.iteritems():
501 result[x][y] = data
502 return results
503
504 def __getitem__(self, key):
505 return self.stattop[key]