1 # Copyright (c) 2003-2004 The Regents of The University of Michigan
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
6 # met: redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer;
8 # redistributions in binary form must reproduce the above copyright
9 # notice, this list of conditions and the following disclaimer in the
10 # documentation and/or other materials provided with the distribution;
11 # neither the name of the copyright holders nor the names of its
12 # contributors may be used to endorse or promote products derived from
13 # this software without specific prior written permission.
15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 def __init__(self
, options
):
31 self
.name
= options
.db
32 self
.host
= options
.host
33 self
.user
= options
.user
34 self
.passwd
= options
.passwd
40 self
.mydb
= MySQLdb
.connect(db
='mysql', host
=self
.host
, user
=self
.user
,
42 self
.cursor
= self
.mydb
.cursor()
46 self
.mydb
= MySQLdb
.connect(db
=self
.name
, host
=self
.host
,
47 user
=self
.user
, passwd
=self
.passwd
)
48 self
.cursor
= self
.mydb
.cursor()
51 if self
.mydb
is not None:
56 self
.cursor
.execute(sql
)
59 self
.query('DROP DATABASE IF EXISTS %s' % self
.name
)
62 self
.query('CREATE DATABASE %s' % self
.name
)
66 # Each run (or simulation) gets its own entry in the runs table to
67 # group stats by where they were generated
70 # 'id' is a unique identifier for each run to be used in other
72 # 'name' is the user designated name for the data generated. It is
73 # configured in the simulator.
74 # 'user' identifies the user that generated the data for the given
76 # 'project' another name to identify runs for a specific goal
77 # 'date' is a timestamp for when the data was generated. It can be
78 # used to easily expire data that was generated in the past.
79 # 'expire' is a timestamp for when the data should be removed from
80 # the database so we don't have years worth of junk.
83 # 'run' is indexed so you can find out details of a run if the run
84 # was retreived from the data table.
85 # 'name' is indexed so that two all run names are forced to be unique
89 rn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
90 rn_name VARCHAR(200) NOT NULL,
91 rn_sample VARCHAR(32) NOT NULL,
92 rn_user VARCHAR(32) NOT NULL,
93 rn_project VARCHAR(100) NOT NULL,
94 rn_date TIMESTAMP NOT NULL,
95 rn_expire TIMESTAMP NOT NULL,
97 UNIQUE (rn_name,rn_sample)
101 # We keep the bin names separate so that the data table doesn't get
102 # huge since bin names are frequently repeated.
105 # 'id' is the unique bin identifer.
106 # 'name' is the string name for the bin.
109 # 'bin' is indexed to get the name of a bin when data is retrieved
110 # via the data table.
111 # 'name' is indexed to get the bin id for a named bin when you want
112 # to search the data table based on a specific bin.
116 bn_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
117 bn_name VARCHAR(255) NOT NULL,
123 # The stat table gives us all of the data for a particular stat.
126 # 'stat' is a unique identifier for each stat to be used in other
127 # tables for references.
128 # 'name' is simply the simulator derived name for a given
130 # 'descr' is the description of the statistic and what it tells
132 # 'type' defines what the stat tells you. Types are:
133 # SCALAR: A simple scalar statistic that holds one value
134 # VECTOR: An array of statistic values. Such a something that
135 # is generated per-thread. Vectors exist to give averages,
136 # pdfs, cdfs, means, standard deviations, etc across the
138 # DIST: Is a distribution of data. When the statistic value is
139 # sampled, its value is counted in a particular bucket.
140 # Useful for keeping track of utilization of a resource.
141 # (e.g. fraction of time it is 25% used vs. 50% vs. 100%)
142 # VECTORDIST: Can be used when the distribution needs to be
143 # factored out into a per-thread distribution of data for
144 # example. It can still be summed across threads to find
145 # the total distribution.
146 # VECTOR2D: Can be used when you have a stat that is not only
147 # per-thread, but it is per-something else. Like
149 # FORMULA: This statistic is a formula, and its data must be
150 # looked up in the formula table, for indicating how to
151 # present its values.
152 # 'subdata' is potentially used by any of the vector types to
153 # give a specific name to all of the data elements within a
155 # 'print' indicates whether this stat should be printed ever.
156 # (Unnamed stats don't usually get printed)
157 # 'prereq' only print the stat if the prereq is not zero.
158 # 'prec' number of decimal places to print
159 # 'nozero' don't print zero values
160 # 'nonan' don't print NaN values
161 # 'total' for vector type stats, print the total.
162 # 'pdf' for vector type stats, print the pdf.
163 # 'cdf' for vector type stats, print the cdf.
165 # The Following are for dist type stats:
166 # 'min' is the minimum bucket value. Anything less is an underflow.
167 # 'max' is the maximum bucket value. Anything more is an overflow.
168 # 'bktsize' is the approximate number of entries in each bucket.
169 # 'size' is the number of buckets. equal to (min/max)/bktsize.
172 # 'stat' is indexed so that you can find out details about a stat
173 # if the stat id was retrieved from the data table.
174 # 'name' is indexed so that you can simply look up data about a
179 st_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
180 st_name VARCHAR(255) NOT NULL,
181 st_descr TEXT NOT NULL,
182 st_type ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST",
183 "VECTOR2D", "FORMULA") NOT NULL,
184 st_print BOOL NOT NULL,
185 st_prereq SMALLINT UNSIGNED NOT NULL,
186 st_prec TINYINT NOT NULL,
187 st_nozero BOOL NOT NULL,
188 st_nonan BOOL NOT NULL,
189 st_total BOOL NOT NULL,
190 st_pdf BOOL NOT NULL,
191 st_cdf BOOL NOT NULL,
192 st_min DOUBLE NOT NULL,
193 st_max DOUBLE NOT NULL,
194 st_bktsize DOUBLE NOT NULL,
195 st_size SMALLINT UNSIGNED NOT NULL,
201 # This is the main table of data for stats.
204 # 'stat' refers to the stat field given in the stat table.
206 # 'x' referrs to the first dimension of a multi-dimensional stat. For
207 # a vector, x will start at 0 and increase for each vector
209 # For a distribution:
210 # -1: sum (for calculating standard deviation)
211 # -2: sum of squares (for calculating standard deviation)
212 # -3: total number of samples taken (for calculating
213 # standard deviation)
218 # 'y' is used by a VECTORDIST and the VECTOR2D to describe the second
220 # 'run' is the run that the data was generated from. Details up in
222 # 'tick' is a timestamp generated by the simulator.
223 # 'bin' is the name of the bin that the data was generated in, if
225 # 'data' is the actual stat value.
228 # 'stat' is indexed so that a user can find all of the data for a
229 # particular stat. It is not unique, because that specific stat
230 # can be found in many runs, bins, and samples, in addition to
231 # having entries for the mulidimensional cases.
232 # 'run' is indexed to allow a user to remove all of the data for a
233 # particular execution run. It can also be used to allow the
234 # user to print out all of the data for a given run.
238 dt_stat SMALLINT UNSIGNED NOT NULL,
239 dt_x SMALLINT NOT NULL,
240 dt_y SMALLINT NOT NULL,
241 dt_run SMALLINT UNSIGNED NOT NULL,
242 dt_tick BIGINT UNSIGNED NOT NULL,
243 dt_bin SMALLINT UNSIGNED NOT NULL,
244 dt_data DOUBLE NOT NULL,
247 UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick,dt_bin)
251 # Names and descriptions for multi-dimensional stats (vectors, etc.)
252 # are stored here instead of having their own entry in the statistics
253 # table. This allows all parts of a single stat to easily share a
257 # 'stat' is the unique stat identifier from the stat table.
258 # 'x' is the first dimension for multi-dimensional stats
259 # corresponding to the data table above.
260 # 'y' is the second dimension for multi-dimensional stats
261 # corresponding to the data table above.
262 # 'name' is the specific subname for the unique stat,x,y combination.
263 # 'descr' is the specific description for the uniqe stat,x,y
267 # 'stat' is indexed so you can get the subdata for a specific stat.
270 CREATE TABLE subdata(
271 sd_stat SMALLINT UNSIGNED NOT NULL,
272 sd_x SMALLINT NOT NULL,
273 sd_y SMALLINT NOT NULL,
274 sd_name VARCHAR(255) NOT NULL,
276 UNIQUE (sd_stat,sd_x,sd_y)
281 # The formula table is maintained separately from the data table
282 # because formula data, unlike other stat data cannot be represented
286 # 'stat' refers to the stat field generated in the stat table.
287 # 'formula' is the actual string representation of the formula
291 # 'stat' is indexed so that you can just look up a formula.
294 CREATE TABLE formulas(
295 fm_stat SMALLINT UNSIGNED NOT NULL,
296 fm_formula BLOB NOT NULL,
301 # Each stat used in each formula is kept in this table. This way, if
302 # you want to print out a particular formula, you can simply find out
303 # which stats you need by looking in this table. Additionally, when
304 # you remove a stat from the stats table and data table, you remove
305 # any references to the formula in this table. When a formula is no
306 # longer referred to, you remove its entry.
309 # 'stat' is the stat id from the stat table above.
310 # 'child' is the stat id of a stat that is used for this formula.
311 # There may be many children for any given 'stat' (formula)
314 # 'stat' is indexed so you can look up all of the children for a
316 # 'child' is indexed so that you can remove an entry when a stat is
320 CREATE TABLE formula_ref(
321 fr_stat SMALLINT UNSIGNED NOT NULL,
322 fr_run SMALLINT UNSIGNED NOT NULL,
323 UNIQUE (fr_stat,fr_run),
329 # 'event' is the unique event id from the event_desc table
330 # 'run' is simulation run id that this event took place in
331 # 'tick' is the tick when the event happened
334 # 'event' is indexed so you can look up all occurences of a
336 # 'run' is indexed so you can find all events in a run
337 # 'tick' is indexed because we want the unique thing anyway
338 # 'event,run,tick' is unique combination
341 ev_event SMALLINT UNSIGNED NOT NULL,
342 ev_run SMALLINT UNSIGNED NOT NULL,
343 ev_tick BIGINT UNSIGNED NOT NULL,
347 UNIQUE(ev_event,ev_run,ev_tick)
351 # 'id' is the unique description id
352 # 'name' is the name of the event that occurred
355 # 'id' is indexed because it is the primary key and is what you use
356 # to look up the descriptions
357 # 'name' is indexed so one can find the event based on name
360 CREATE TABLE event_names(
361 en_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT,
362 en_name VARCHAR(255) NOT NULL,
371 LEFT JOIN runs ON dt_run=rn_id
372 WHERE rn_id IS NULL''')
377 LEFT JOIN runs ON fr_run=rn_id
378 WHERE rn_id IS NULL''')
383 LEFT JOIN formula_ref ON fm_stat=fr_stat
384 WHERE fr_stat IS NULL''')
389 LEFT JOIN data ON st_id=dt_stat
390 WHERE dt_stat IS NULL''')
395 LEFT JOIN data ON sd_stat=dt_stat
396 WHERE dt_stat IS NULL''')
401 LEFT JOIN data ON bn_id=dt_bin
402 WHERE dt_bin IS NULL''')
407 LEFT JOIN runs ON ev_run=rn_id
408 WHERE rn_id IS NULL''')
413 LEFT JOIN events ON en_id=ev_event
414 WHERE ev_event IS NULL''')