Totally re-do/reorganize the python part of the statistics code
authorNathan Binkert <binkertn@umich.edu>
Tue, 10 Aug 2004 01:20:52 +0000 (21:20 -0400)
committerNathan Binkert <binkertn@umich.edu>
Tue, 10 Aug 2004 01:20:52 +0000 (21:20 -0400)
Make the database creation/removal/cleanup code use python
Make formulas work with the database
Add support to do some graphing, but needs more work
Still need to work on vectors, 2d vectors, dists and vectordists

--HG--
extra : convert_revision : 1a88320dcc036a3751e8a036770766dce76a568c

util/stats/db.py [new file with mode: 0644]
util/stats/dbinit.py [new file with mode: 0644]
util/stats/display.py [new file with mode: 0644]
util/stats/flags.py [new file with mode: 0644]
util/stats/info.py [new file with mode: 0644]
util/stats/print.py [new file with mode: 0644]
util/stats/stats.py [new file with mode: 0755]

diff --git a/util/stats/db.py b/util/stats/db.py
new file mode 100644 (file)
index 0000000..4cba824
--- /dev/null
@@ -0,0 +1,415 @@
+import MySQLdb, re, string
+
+def statcmp(a, b):
+    v1 = a.split('.')
+    v2 = b.split('.')
+
+    last = min(len(v1), len(v2)) - 1
+    for i,j in zip(v1[0:last], v2[0:last]):
+        if i != j:
+            return cmp(i, j)
+
+    # Special compare for last element.
+    if len(v1) == len(v2):
+        return cmp(v1[last], v2[last])
+    else:
+        return cmp(len(v1), len(v2))
+
+class RunData:
+    def __init__(self, row):
+        self.run = int(row[0])
+        self.name = row[1]
+        self.user = row[2]
+        self.project = row[3]
+
+class SubData:
+    def __init__(self, row):
+        self.stat = int(row[0])
+        self.x = int(row[1])
+        self.y = int(row[2])
+        self.name = row[3]
+        self.descr = row[4]
+
+class Data:
+    def __init__(self, row):
+        if len(row) != 5:
+            raise 'stat db error'
+        self.stat = int(row[0])
+        self.run = int(row[1])
+        self.x = int(row[2])
+        self.y = int(row[3])
+        self.data = float(row[4])
+
+    def __repr__(self):
+        return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat,
+            self.run, self.x, self.y, self.data)
+
+class StatData(object):
+    def __init__(self, row):
+        self.stat = int(row[0])
+        self.name = row[1]
+        self.desc = row[2]
+        self.type = row[3]
+        self.prereq = int(row[5])
+        self.precision = int(row[6])
+
+        import flags
+        self.flags = 0
+        if int(row[4]): self.flags |= flags.printable
+        if int(row[7]): self.flags |= flags.nozero
+        if int(row[8]): self.flags |= flags.nonan
+        if int(row[9]): self.flags |= flags.total
+        if int(row[10]): self.flags |= flags.pdf
+        if int(row[11]): self.flags |= flags.cdf
+
+        if self.type == 'DIST' or self.type == 'VECTORDIST':
+            self.min = float(row[12])
+            self.max = float(row[13])
+            self.bktsize = float(row[14])
+            self.size = int(row[15])
+
+        if self.type == 'FORMULA':
+            self.formula = self.db.allFormulas[self.stat]
+
+class Node(object):
+    def __init__(self, name):
+        self.name = name
+    def __str__(self):
+        return name
+
+class Database(object):
+    def __init__(self):
+        self.host = 'zizzer.pool'
+        self.user = ''
+        self.passwd = ''
+        self.db = 'm5stats'
+        self.cursor = None
+
+        self.allStats = []
+        self.allStatIds = {}
+        self.allStatNames = {}
+
+        self.allSubData = {}
+
+        self.allRuns = []
+        self.allRunIds = {}
+        self.allRunNames = {}
+
+        self.allBins = []
+        self.allBinIds = {}
+        self.allBinNames = {}
+
+        self.allFormulas = {}
+
+        self.stattop = {}
+        self.statdict = {}
+        self.statlist = []
+
+        self.mode = 'sum';
+        self.runs = None
+        self.bins = None
+        self.ticks = None
+        self.__dict__['get'] = type(self).sum
+
+    def query(self, sql):
+        self.cursor.execute(sql)
+
+    def update_dict(self, dict):
+        dict.update(self.stattop)
+
+    def append(self, stat):
+        statname = re.sub(':', '__', stat.name)
+        path = string.split(statname, '.')
+        pathtop = path[0]
+        fullname = ''
+
+        x = self
+        while len(path) > 1:
+            name = path.pop(0)
+            if not x.__dict__.has_key(name):
+                x.__dict__[name] = Node(fullname + name)
+            x = x.__dict__[name]
+            fullname = '%s%s.' % (fullname, name)
+
+        name = path.pop(0)
+        x.__dict__[name] = stat
+
+        self.stattop[pathtop] = self.__dict__[pathtop]
+        self.statdict[statname] = stat
+        self.statlist.append(statname)
+
+    def connect(self):
+        # connect
+        self.thedb = MySQLdb.connect(db=self.db,
+                                     host=self.host,
+                                     user=self.user,
+                                     passwd=self.passwd)
+
+        # create a cursor
+        self.cursor = self.thedb.cursor()
+
+        self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project
+                   from runs''')
+        for result in self.cursor.fetchall():
+            run = RunData(result);
+            self.allRuns.append(run)
+            self.allRunIds[run.run] = run
+            self.allRunNames[run.name] = run
+
+        self.query('select * from bins')
+        for id,name in self.cursor.fetchall():
+            self.allBinIds[int(id)] = name
+            self.allBinNames[name] = int(id)
+
+        self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata')
+        for result in self.cursor.fetchall():
+            subdata = SubData(result)
+            if self.allSubData.has_key(subdata.stat):
+                self.allSubData[subdata.stat].append(subdata)
+            else:
+                self.allSubData[subdata.stat] = [ subdata ]
+
+        self.query('select * from formulas')
+        for id,formula in self.cursor.fetchall():
+            self.allFormulas[int(id)] = formula
+
+        StatData.db = self
+        self.query('select * from stats')
+        import info
+        for result in self.cursor.fetchall():
+            stat = info.NewStat(StatData(result))
+            self.append(stat)
+            self.allStats.append(stat)
+            self.allStatIds[stat.stat] = stat
+            self.allStatNames[stat.name] = stat
+
+    # Name: listbins
+    # Desc: Prints all bins matching regex argument, if no argument
+    #       is given all bins are returned
+    def listBins(self, regex='.*'):
+        print '%-50s %-10s' % ('bin name', 'id')
+        print '-' * 61
+        names = self.allBinNames.keys()
+        names.sort()
+        for name in names:
+            id = self.allBinNames[name]
+            print '%-50s %-10d' % (name, id)
+
+    # Name: listruns
+    # Desc: Prints all runs matching a given user, if no argument
+    #       is given all runs are returned
+    def listRuns(self, user=None):
+        print '%-40s %-10s %-5s' % ('run name', 'user', 'id')
+        print '-' * 62
+        for run in self.allRuns:
+            if user == None or user == run.user:
+                print '%-40s %-10s %-10d' % (run.name, run.user, run.run)
+
+    # Name: listTicks
+    # Desc: Prints all samples for a given run
+    def listTicks(self, run=None):
+        print "tick"
+        print "----------------------------------------"
+        sql = 'select distinct dt_tick from data where dt_stat=1950'
+        #if run != None:
+        #    sql += ' where dt_run=%d' % run
+        self.query(sql)
+        for r in self.cursor.fetchall():
+            print r[0]
+
+    # Name: liststats
+    # Desc: Prints all statistics that appear in the database,
+    #         the optional argument is a regular expression that can
+    #         be used to prune the result set
+    def listStats(self, regex=None):
+        print '%-60s %-8s %-10s' % ('stat name', 'id', 'type')
+        print '-' * 80
+
+        rx = None
+        if regex != None:
+            rx = re.compile(regex)
+
+        stats = [ stat.name for stat in self.allStats ]
+        stats.sort(statcmp)
+        for stat in stats:
+            stat = self.allStatNames[stat]
+            if rx == None or rx.match(stat.name):
+                print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type)
+
+    # Name: liststats
+    # Desc: Prints all statistics that appear in the database,
+    #         the optional argument is a regular expression that can
+    #         be used to prune the result set
+    def listFormulas(self, regex=None):
+        print '%-60s %s' % ('formula name', 'formula')
+        print '-' * 80
+
+        rx = None
+        if regex != None:
+            rx = re.compile(regex)
+
+        stats = [ stat.name for stat in self.allStats ]
+        stats.sort(statcmp)
+        for stat in stats:
+            stat = self.allStatNames[stat]
+            if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)):
+                print '%-60s %s' % (stat.name, self.allFormulas[stat.stat])
+
+    def getStat(self, stats):
+        if type(stats) is not list:
+            stats = [ stats ]
+
+        ret = []
+        for stat in stats:
+            if type(stat) is int:
+                ret.append(self.allStatIds[stat])
+
+            if type(stat) is str:
+                rx = re.compile(stat)
+                for stat in self.allStats:
+                    if rx.match(stat.name):
+                        ret.append(stat)
+        return ret
+
+    def getBin(self, bins):
+        if type(bins) is not list:
+            bins = [ bins ]
+
+        ret = []
+        for bin in bins:
+            if type(bin) is int:
+                ret.append(bin)
+            elif type(bin) is str:
+                ret.append(self.allBinNames[bin])
+            else:
+                for name,id in self.allBinNames.items():
+                    if bin.match(name):
+                        ret.append(id)
+
+        return ret
+
+    def getNotBin(self, bin):
+        map = {}
+        for bin in getBin(bin):
+            map[bin] = 1
+
+        ret = []
+        for bin in self.allBinIds.keys():
+            if not map.has_key(bin):
+                ret.append(bin)
+
+        return ret
+
+    #########################################
+    # get the data
+    #
+    def inner(self, op, stat, bins, ticks, group=False):
+        sql = 'select '
+        sql += 'dt_stat as stat, '
+        sql += 'dt_run as run, '
+        sql += 'dt_x as x, '
+        sql += 'dt_y as y, '
+        if group:
+            sql += 'dt_tick as tick, '
+        sql += '%s(dt_data) as data ' % op
+        sql += 'from data '
+        sql += 'where '
+
+        if isinstance(stat, list):
+            val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ])
+            sql += ' (%s)' % val
+        else:
+            sql += ' dt_stat=%d' % stat.stat
+
+        if self.runs != None and len(self.runs):
+            val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ])
+            sql += ' and (%s)' % val
+
+        if bins != None and len(bins):
+            val = ' or '.join([ 'dt_bin=%d' % b for b in bins ])
+            sql += ' and (%s)' % val
+
+        if ticks != None and len(ticks):
+            val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ])
+            sql += ' and (%s)' % val
+
+        sql += ' group by dt_stat,dt_run,dt_x,dt_y'
+        if group:
+            sql += ',dt_tick'
+        return sql
+
+    def outer(self, op_out, op_in, stat, bins, ticks):
+        sql = self.inner(op_in, stat, bins, ticks, True)
+        sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql)
+        sql += 'group by stat,run,x,y'
+        return sql
+
+    # Name: sum
+    # Desc: given a run, a stat and an array of samples and bins,
+    #        sum all the bins and then get the standard deviation of the
+    #        samples for non-binned runs. This will just return the average
+    #        of samples, however a bin array still must be passed
+    def sum(self, stat, bins, ticks):
+        return self.inner('sum', stat, bins, ticks)
+
+    # Name: avg
+    # Desc: given a run, a stat and an array of samples and bins,
+    #        sum all the bins and then average the samples for non-binned
+    #        runs this will just return the average of samples, however
+    #        a bin array still must be passed
+    def avg(self, stat, bins, ticks):
+        return self.outer('avg', 'sum', stat, bins, ticks)
+
+    # Name: stdev
+    # Desc: given a run, a stat and an array of samples and bins,
+    #        sum all the bins and then get the standard deviation of the
+    #        samples for non-binned runs. This will just return the average
+    #        of samples, however a bin array still must be passed
+    def stdev(self, stat, bins, ticks):
+        return self.outer('stddev', 'sum', stat, bins, ticks)
+
+    def __getattribute__(self, attr):
+        if attr != 'get':
+            return super(Database, self).__getattribute__(attr)
+
+        if self.__dict__['get'] == type(self).sum:
+            return 'sum'
+        elif self.__dict__['get'] == type(self).avg:
+            return 'avg'
+        elif self.__dict__['get'] == type(self).stdev:
+            return 'stdev'
+        else:
+            return ''
+
+    def __setattr__(self, attr, value):
+        if attr != 'get':
+            super(Database, self).__setattr__(attr, value)
+            return
+
+        if value == 'sum':
+            self.__dict__['get'] = type(self).sum
+        elif value == 'avg':
+            self.__dict__['get'] = type(self).avg
+        elif value == 'stdev':
+            self.__dict__['get'] = type(self).stdev
+        else:
+            raise AttributeError, "can only set get to: sum | avg | stdev"
+
+    def data(self, stat, bins=None, ticks=None):
+        if bins is None:
+            bins = self.bins
+        if ticks is None:
+            ticks = self.ticks
+        sql = self.__dict__['get'](self, stat, bins, ticks)
+        self.query(sql)
+
+        runs = {}
+        for x in self.cursor.fetchall():
+            data = Data(x)
+            if not runs.has_key(data.run):
+                runs[data.run] = {}
+            if not runs[data.run].has_key(data.x):
+                runs[data.run][data.x] = {}
+
+            runs[data.run][data.x][data.y] = data.data
+        return runs
diff --git a/util/stats/dbinit.py b/util/stats/dbinit.py
new file mode 100644 (file)
index 0000000..686f55c
--- /dev/null
@@ -0,0 +1,388 @@
+import MySQLdb
+
+class MyDB(object):
+    def __init__(self, options):
+        self.name = options.db
+        self.host = options.host
+        self.user = options.user
+        self.passwd = options.passwd
+        self.mydb = None
+        self.cursor = None
+
+    def admin(self):
+        self.close()
+        self.mydb = MySQLdb.connect(db='mysql', host=self.host, user=self.user,
+                                    passwd=self.passwd)
+        self.cursor = self.mydb.cursor()
+
+    def connect(self):
+        self.close()
+        self.mydb = MySQLdb.connect(db=self.name, host=self.host,
+                                    user=self.user, passwd=self.passwd)
+        self.cursor = self.mydb.cursor()
+
+    def close(self):
+        if self.mydb is not None:
+            self.mydb.close()
+        self.cursor = None
+
+    def query(self, sql):
+        self.cursor.execute(sql)
+
+    def drop(self):
+        self.query('DROP DATABASE IF EXISTS %s' % self.name)
+
+    def create(self):
+        self.query('CREATE DATABASE %s' % self.name)
+
+    def populate(self):
+        #
+        # Each run (or simulation) gets its own entry in the runs table to
+        # group stats by where they were generated
+        #
+        # COLUMNS:
+        #   'id' is a unique identifier for each run to be used in other
+        #       tables.
+        #   'name' is the user designated name for the data generated.  It is
+        #       configured in the simulator.
+        #   'user' identifies the user that generated the data for the given
+        #       run.
+        #   'project' another name to identify runs for a specific goal
+        #   'date' is a timestamp for when the data was generated.  It can be
+        #       used to easily expire data that was generated in the past.
+        #   'expire' is a timestamp for when the data should be removed from
+        #       the database so we don't have years worth of junk.
+        #
+        # INDEXES:
+        #   'run' is indexed so you can find out details of a run if the run
+        #       was retreived from the data table.
+        #   'name' is indexed so that two all run names are forced to be unique
+        #
+        self.query('''
+        CREATE TABLE runs(
+            rn_id      SMALLINT UNSIGNED       NOT NULL AUTO_INCREMENT,
+            rn_name    VARCHAR(200)            NOT NULL,
+            rn_sample  VARCHAR(32)             NOT NULL,
+            rn_user    VARCHAR(32)             NOT NULL,
+            rn_project VARCHAR(100)            NOT NULL,
+            rn_date    TIMESTAMP               NOT NULL,
+            rn_expire  TIMESTAMP               NOT NULL,
+            PRIMARY KEY (rn_id),
+            UNIQUE (rn_name,rn_sample)
+        ) TYPE=InnoDB''')
+
+        #
+        # We keep the bin names separate so that the data table doesn't get
+        # huge since bin names are frequently repeated.
+        #
+        # COLUMNS:
+        #   'id' is the unique bin identifer.
+        #   'name' is the string name for the bin.
+        #
+        # INDEXES:
+        #   'bin' is indexed to get the name of a bin when data is retrieved
+        #       via the data table.
+        #   'name' is indexed to get the bin id for a named bin when you want
+        #       to search the data table based on a specific bin.
+        #
+        self.query('''
+        CREATE TABLE bins(
+            bn_id      SMALLINT UNSIGNED       NOT NULL AUTO_INCREMENT,
+            bn_name    VARCHAR(255)            NOT NULL,
+            PRIMARY KEY(bn_id),
+            UNIQUE (bn_name)
+        ) TYPE=InnoDB''')
+
+        #
+        # The stat table gives us all of the data for a particular stat.
+        #
+        # COLUMNS:
+        #   'stat' is a unique identifier for each stat to be used in other
+        #       tables for references.
+        #   'name' is simply the simulator derived name for a given
+        #       statistic.
+        #   'descr' is the description of the statistic and what it tells
+        #       you.
+        #   'type' defines what the stat tells you.  Types are:
+        #       SCALAR: A simple scalar statistic that holds one value
+        #       VECTOR: An array of statistic values.  Such a something that
+        #           is generated per-thread.  Vectors exist to give averages,
+        #           pdfs, cdfs, means, standard deviations, etc across the
+        #           stat values.
+        #       DIST: Is a distribution of data.  When the statistic value is
+        #           sampled, its value is counted in a particular bucket.
+        #           Useful for keeping track of utilization of a resource.
+        #           (e.g. fraction of time it is 25% used vs. 50% vs. 100%)
+        #       VECTORDIST: Can be used when the distribution needs to be
+        #           factored out into a per-thread distribution of data for
+        #           example.  It can still be summed across threads to find
+        #           the total distribution.
+        #       VECTOR2D: Can be used when you have a stat that is not only
+        #           per-thread, but it is per-something else.  Like
+        #           per-message type.
+        #       FORMULA: This statistic is a formula, and its data must be
+        #           looked up in the formula table, for indicating how to
+        #           present its values.
+        #   'subdata' is potentially used by any of the vector types to
+        #       give a specific name to all of the data elements within a
+        #       stat.
+        #   'print' indicates whether this stat should be printed ever.
+        #       (Unnamed stats don't usually get printed)
+        #   'prereq' only print the stat if the prereq is not zero.
+        #   'prec' number of decimal places to print
+        #   'nozero' don't print zero values
+        #   'nonan' don't print NaN values
+        #   'total' for vector type stats, print the total.
+        #   'pdf' for vector type stats, print the pdf.
+        #   'cdf' for vector type stats, print the cdf.
+        #
+        #   The Following are for dist type stats:
+        #   'min' is the minimum bucket value. Anything less is an underflow.
+        #   'max' is the maximum bucket value. Anything more is an overflow.
+        #   'bktsize' is the approximate number of entries in each bucket.
+        #   'size' is the number of buckets. equal to (min/max)/bktsize.
+        #
+        # INDEXES:
+        #   'stat' is indexed so that you can find out details about a stat
+        #       if the stat id was retrieved from the data table.
+        #   'name' is indexed so that you can simply look up data about a
+        #       named stat.
+        #
+        self.query('''
+        CREATE TABLE stats(
+            st_id      SMALLINT UNSIGNED       NOT NULL AUTO_INCREMENT,
+            st_name    VARCHAR(255)            NOT NULL,
+            st_descr   TEXT                    NOT NULL,
+            st_type    ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST",
+                "VECTOR2D", "FORMULA") NOT NULL,
+            st_print   BOOL                    NOT NULL,
+            st_prereq  SMALLINT UNSIGNED       NOT NULL,
+            st_prec    TINYINT                 NOT NULL,
+            st_nozero  BOOL                    NOT NULL,
+            st_nonan   BOOL                    NOT NULL,
+            st_total   BOOL                    NOT NULL,
+            st_pdf     BOOL                    NOT NULL,
+            st_cdf     BOOL                    NOT NULL,
+            st_min     DOUBLE                  NOT NULL,
+            st_max     DOUBLE                  NOT NULL,
+            st_bktsize DOUBLE                  NOT NULL,
+            st_size    SMALLINT UNSIGNED       NOT NULL,
+            PRIMARY KEY (st_id),
+            UNIQUE (st_name)
+        ) TYPE=InnoDB''')
+
+        #
+        # This is the main table of data for stats.
+        #
+        # COLUMNS:
+        #   'stat' refers to the stat field given in the stat table.
+        #
+        #   'x' referrs to the first dimension of a multi-dimensional stat. For
+        #       a vector, x will start at 0 and increase for each vector
+        #       element.
+        #       For a distribution:
+        #       -1: sum (for calculating standard deviation)
+        #       -2: sum of squares (for calculating standard deviation)
+        #       -3: total number of samples taken (for calculating
+        #           standard deviation)
+        #       -4: minimum value
+        #       -5: maximum value
+        #       -6: underflow
+        #       -7: overflow
+        #   'y' is used by a VECTORDIST and the VECTOR2D to describe the second
+        #       dimension.
+        #   'run' is the run that the data was generated from.  Details up in
+        #       the run table
+        #   'tick' is a timestamp generated by the simulator.
+        #   'bin' is the name of the bin that the data was generated in, if
+        #       any.
+        #   'data' is the actual stat value.
+        #
+        # INDEXES:
+        #   'stat' is indexed so that a user can find all of the data for a
+        #       particular stat. It is not unique, because that specific stat
+        #       can be found in many runs, bins, and samples, in addition to
+        #       having entries for the mulidimensional cases.
+        #   'run' is indexed to allow a user to remove all of the data for a
+        #       particular execution run.  It can also be used to allow the
+        #       user to print out all of the data for a given run.
+        #
+        self.query('''
+        CREATE TABLE data(
+            dt_stat    SMALLINT UNSIGNED       NOT NULL,
+            dt_x       SMALLINT                NOT NULL,
+            dt_y       SMALLINT                NOT NULL,
+            dt_run     SMALLINT UNSIGNED       NOT NULL,
+            dt_tick    BIGINT UNSIGNED         NOT NULL,
+            dt_bin     SMALLINT UNSIGNED       NOT NULL,
+            dt_data    DOUBLE                  NOT NULL,
+            INDEX (dt_stat),
+            INDEX (dt_run),
+            UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick,dt_bin)
+        ) TYPE=InnoDB;''')
+
+        #
+        # Names and descriptions for multi-dimensional stats (vectors, etc.)
+        # are stored here instead of having their own entry in the statistics
+        # table. This allows all parts of a single stat to easily share a
+        # single id.
+        #
+        # COLUMNS:
+        #   'stat' is the unique stat identifier from the stat table.
+        #   'x' is the first dimension for multi-dimensional stats
+        #       corresponding to the data table above.
+        #   'y' is the second dimension for multi-dimensional stats
+        #       corresponding to the data table above.
+        #   'name' is the specific subname for the unique stat,x,y combination.
+        #   'descr' is the specific description for the uniqe stat,x,y
+        #        combination.
+        #
+        # INDEXES:
+        #   'stat' is indexed so you can get the subdata for a specific stat.
+        #
+        self.query('''
+        CREATE TABLE subdata(
+            sd_stat    SMALLINT UNSIGNED       NOT NULL,
+            sd_x       SMALLINT                NOT NULL,
+            sd_y       SMALLINT                NOT NULL,
+            sd_name    VARCHAR(255)            NOT NULL,
+            sd_descr   TEXT,
+            UNIQUE (sd_stat,sd_x,sd_y)
+        ) TYPE=InnoDB''')
+
+
+        #
+        # The formula table is maintained separately from the data table
+        # because formula data, unlike other stat data cannot be represented
+        # there.
+        #
+        # COLUMNS:
+        #   'stat' refers to the stat field generated in the stat table.
+        #   'formula' is the actual string representation of the formula
+        #       itself.
+        #
+        # INDEXES:
+        #   'stat' is indexed so that you can just look up a formula.
+        #
+        self.query('''
+        CREATE TABLE formulas(
+            fm_stat    SMALLINT UNSIGNED       NOT NULL,
+            fm_formula BLOB                    NOT NULL,
+            PRIMARY KEY(fm_stat)
+        ) TYPE=InnoDB''')
+
+        #
+        # Each stat used in each formula is kept in this table.  This way, if
+        # you want to print out a particular formula, you can simply find out
+        # which stats you need by looking in this table.  Additionally, when
+        # you remove a stat from the stats table and data table, you remove
+        # any references to the formula in this table.  When a formula is no
+        # longer referred to, you remove its entry.
+        #
+        # COLUMNS:
+        #   'stat' is the stat id from the stat table above.
+        #   'child' is the stat id of a stat that is used for this formula.
+        #       There may be many children for any given 'stat' (formula)
+        #
+        # INDEXES:
+        #   'stat' is indexed so you can look up all of the children for a
+        #       particular stat.
+        #   'child' is indexed so that you can remove an entry when a stat is
+        #       removed.
+        #
+        self.query('''
+        CREATE TABLE formula_ref(
+            fr_stat    SMALLINT UNSIGNED       NOT NULL,
+            fr_run     SMALLINT UNSIGNED       NOT NULL,
+            UNIQUE (fr_stat,fr_run),
+            INDEX (fr_stat),
+            INDEX (fr_run)
+        ) TYPE=InnoDB''')
+
+        # COLUMNS:
+        #   'event' is the unique event id from the event_desc table
+        #   'run' is simulation run id that this event took place in
+        #   'tick' is the tick when the event happened
+        #
+        # INDEXES:
+        #   'event' is indexed so you can look up all occurences of a
+        #       specific event
+        #   'run' is indexed so you can find all events in a run
+        #   'tick' is indexed because we want the unique thing anyway
+        #   'event,run,tick' is unique combination
+        self.query('''
+        CREATE TABLE events(
+            ev_event   SMALLINT UNSIGNED       NOT NULL,
+            ev_run     SMALLINT UNSIGNED       NOT NULL,
+            ev_tick    BIGINT   UNSIGNED       NOT NULL,
+            INDEX(ev_event),
+            INDEX(ev_run),
+            INDEX(ev_tick),
+            UNIQUE(ev_event,ev_run,ev_tick)
+        ) TYPE=InnoDB''')
+
+        # COLUMNS:
+        #   'id' is the unique description id
+        #   'name' is the name of the event that occurred
+        #
+        # INDEXES:
+        #   'id' is indexed because it is the primary key and is what you use
+        #       to look up the descriptions
+        #   'name' is indexed so one can find the event based on name
+        #
+        self.query('''
+        CREATE TABLE event_names(
+            en_id      SMALLINT UNSIGNED       NOT NULL AUTO_INCREMENT,
+            en_name    VARCHAR(255)            NOT NULL,
+            PRIMARY KEY (en_id),
+            UNIQUE (en_name)
+        ) TYPE=InnoDB''')
+
+    def clean(self):
+        self.query('''
+        DELETE data
+        FROM data
+        LEFT JOIN runs ON dt_run=rn_id
+        WHERE rn_id IS NULL''')
+
+        self.query('''
+        DELETE formula_ref
+        FROM formula_ref
+        LEFT JOIN runs ON fr_run=rn_id
+        WHERE rn_id IS NULL''')
+
+        self.query('''
+        DELETE formulas
+        FROM formulas
+        LEFT JOIN formula_ref ON fm_stat=fr_stat
+        WHERE fr_stat IS NULL''')
+
+        self.query('''
+        DELETE stats
+        FROM stats
+        LEFT JOIN data ON st_id=dt_stat
+        WHERE dt_stat IS NULL''')
+
+        self.query('''
+        DELETE subdata
+        FROM subdata
+        LEFT JOIN data ON sd_stat=dt_stat
+        WHERE dt_stat IS NULL''')
+
+        self.query('''
+        DELETE bins
+        FROM bins
+        LEFT JOIN data ON bn_id=dt_bin
+        WHERE dt_bin IS NULL''')
+
+        self.query('''
+        DELETE events
+        FROM events
+        LEFT JOIN runs ON ev_run=rn_id
+        WHERE rn_id IS NULL''')
+
+        self.query('''
+        DELETE event_names
+        FROM event_names
+        LEFT JOIN events ON en_id=ev_event
+        WHERE ev_event IS NULL''')
diff --git a/util/stats/display.py b/util/stats/display.py
new file mode 100644 (file)
index 0000000..68a2685
--- /dev/null
@@ -0,0 +1,124 @@
+class Value:
+    def __init__(self, value, precision, percent = False):
+        self.value = value
+        self.precision = precision
+        self.percent = percent
+    def __str__(self):
+        if isinstance(self.value, str):
+            if self.value.lower() == 'nan':
+                value = 'NaN'
+            if self.value.lower() == 'inf':
+                value = 'Inf'
+        else:
+            if self.precision >= 0:
+                format = "%%.%df" % self.precision
+            elif self.value == 0.0:
+                format = "%.0f"
+            elif self.value % 1.0 == 0.0:
+                format = "%.0f"
+            else:
+                format = "%f"
+            value = self.value
+            if self.percent:
+                value = value * 100.0
+            value = format % value
+
+        if self.percent:
+            value = value + "%"
+
+        return value
+
+class Print:
+    def __init__(self, **vals):
+        self.__dict__.update(vals)
+
+    def __str__(self):
+        value = Value(self.value, self.precision)
+        pdf = ''
+        cdf = ''
+        if self.__dict__.has_key('pdf'):
+            pdf = Value(self.pdf, 2, True)
+        if self.__dict__.has_key('cdf'):
+            cdf = Value(self.cdf, 2, True)
+
+        output = "%-40s %12s %8s %8s" % (self.name, value, pdf, cdf)
+
+        if descriptions and self.__dict__.has_key('desc') and self.desc:
+            output = "%s # %s" % (output, self.desc)
+
+        return output
+
+    def doprint(self):
+        if display_all:
+            return True
+        if self.value == 0.0 and (self.flags & flags_nozero):
+            return False
+        if isinstance(self.value, str):
+            if self.value == 'NaN' and (self.flags & flags_nonan):
+                return False
+        return True
+
+    def display(self):
+        if self.doprint():
+            print self
+
+class VectorDisplay:
+    def display(self):
+        p = Print()
+        p.flags = self.flags
+        p.precision = self.precision
+
+        if issequence(self.value):
+            if not len(self.value):
+                return
+
+            mytotal = reduce(lambda x,y: float(x) + float(y), self.value)
+            mycdf = 0.0
+
+            value = self.value
+
+            if display_all:
+                subnames = [ '[%d]' % i for i in range(len(value)) ]
+            else:
+                subnames = [''] * len(value)
+
+            if self.__dict__.has_key('subnames'):
+                for i,each in enumerate(self.subnames):
+                    if len(each) > 0:
+                        subnames[i] = '.%s' % each
+
+            subdescs = [self.desc]*len(value)
+            if self.__dict__.has_key('subdescs'):
+                for i in xrange(min(len(value), len(self.subdescs))):
+                    subdescs[i] = self.subdescs[i]
+
+            for val,sname,sdesc in map(None, value, subnames, subdescs):
+                if mytotal > 0.0:
+                    mypdf = float(val) / float(mytotal)
+                    mycdf += mypdf
+                    if (self.flags & flags_pdf):
+                        p.pdf = mypdf
+                        p.cdf = mycdf
+
+                if len(sname) == 0:
+                    continue
+
+                p.name = self.name + sname
+                p.desc = sdesc
+                p.value = val
+                p.display()
+
+            if (self.flags & flags_total):
+                if (p.__dict__.has_key('pdf')): del p.__dict__['pdf']
+                if (p.__dict__.has_key('cdf')): del p.__dict__['cdf']
+                p.name = self.name + '.total'
+                p.desc = self.desc
+                p.value = mytotal
+                p.display()
+
+        else:
+            p.name = self.name
+            p.desc = self.desc
+            p.value = self.value
+            p.display()
+
diff --git a/util/stats/flags.py b/util/stats/flags.py
new file mode 100644 (file)
index 0000000..7a57e72
--- /dev/null
@@ -0,0 +1,36 @@
+# Copyright (c) 2004 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Nathan Binkert
+
+init      = 0x00000001
+printable = 0x00000002
+total     = 0x00000010
+pdf       = 0x00000020
+cdf       = 0x00000040
+dist      = 0x00000080
+nozero    = 0x00000100
+nonan     = 0x00000200
diff --git a/util/stats/info.py b/util/stats/info.py
new file mode 100644 (file)
index 0000000..a94563c
--- /dev/null
@@ -0,0 +1,724 @@
+from __future__ import division
+import operator, re, types
+
+source = None
+display_run = 0
+
+def issequence(t):
+    return isinstance(t, types.TupleType) or isinstance(t, types.ListType)
+
+def total(f):
+    if isinstance(f, FormulaStat):
+        v = f.value
+    else:
+        v = f
+
+    f = FormulaStat()
+    if issequence(v):
+        f.value = reduce(operator.add, v)
+    else:
+        f.value = v
+
+    return f
+
+def unaryop(op, f):
+    if isinstance(f, FormulaStat):
+        v = f.value
+    else:
+        v = f
+
+    if issequence(v):
+        return map(op, v)
+    else:
+        return op(v)
+
+def zerodiv(lv, rv):
+    if rv == 0.0:
+        return 0.0
+    else:
+        return operator.truediv(lv, rv)
+
+def wrapop(op, lv, rv):
+    if isinstance(lv, str):
+        return lv
+
+    if isinstance(rv, str):
+        return rv
+
+    return op(lv, rv)
+
+def same(lv, rv):
+    for lrun,rrun in zip(lv.keys(),rv.keys()):
+        if lrun != rrun:
+            print 'lrun != rrun'
+            print lrun, rrun
+            print lv.keys()
+            print rv.keys()
+            return False
+        for lx,rx in zip(lv[lrun].keys(),rv[rrun].keys()):
+            if lx != rx:
+                print 'lx != rx'
+                print lx, rx
+                print lv[lrun].keys()
+                print rv[rrun].keys()
+                return False
+            for ly,ry in zip(lv[lrun][lx].keys(),rv[rrun][rx].keys()):
+                if ly != ry:
+                    print 'ly != ry'
+                    print ly, ry
+                    print lv[lrun][lx].keys()
+                    print rv[rrun][rx].keys()
+                    return False
+    return True
+
+
+def binaryop(op, lf, rf):
+    result = {}
+
+    if isinstance(lf, FormulaStat) and isinstance(rf, FormulaStat):
+        lv = lf.value
+        rv = rf.value
+
+        if not same(lv, rv):
+            raise AttributeError, "run,x,y not identical"
+
+        for run in lv.keys():
+            result[run] = {}
+            for x in lv[run].keys():
+                result[run][x] = {}
+                for y in lv[run][x].keys():
+                    result[run][x][y] = wrapop(op, lv[run][x][y],
+                                               rv[run][x][y])
+    elif isinstance(lf, FormulaStat):
+        lv = lf.value
+        for run in lv.keys():
+            result[run] = {}
+            for x in lv[run].keys():
+                result[run][x] = {}
+                for y in lv[run][x].keys():
+                    result[run][x][y] = wrapop(op, lv[run][x][y], rf)
+    elif isinstance(rf, FormulaStat):
+        rv = rf.value
+        for run in rv.keys():
+            result[run] = {}
+            for x in rv[run].keys():
+                result[run][x] = {}
+                for y in rv[run][x].keys():
+                    result[run][x][y] = wrapop(op, lf, rv[run][x][y])
+
+    return result
+
+def sums(x, y):
+    if issequence(x):
+        return map(lambda x, y: x + y, x, y)
+    else:
+        return x + y
+
+def alltrue(list):
+    return reduce(lambda x, y: x and y, list)
+
+def allfalse(list):
+    return not reduce(lambda x, y: x or y, list)
+
+def enumerate(list):
+    return map(None, range(len(list)), list)
+
+def cmp(a, b):
+    if a < b:
+        return -1
+    elif a == b:
+        return 0
+    else:
+        return 1
+
+class Statistic(object):
+    def __init__(self, data):
+        self.__dict__.update(data.__dict__)
+        if not self.__dict__.has_key('value'):
+            self.__dict__['value'] = None
+        if not self.__dict__.has_key('bins'):
+            self.__dict__['bins'] = None
+        if not self.__dict__.has_key('ticks'):
+            self.__dict__['ticks'] = None
+
+    def __getattribute__(self, attr):
+        if attr == 'value':
+            if self.__dict__['value'] == None:
+                self.__dict__['value'] = self.getValue()
+            return self.__dict__['value']
+        else:
+            return super(Statistic, self).__getattribute__(attr)
+
+    def __setattr__(self, attr, value):
+        if attr == 'bins' or attr == 'ticks':
+            if attr == 'bins':
+                global db
+                if value is not None:
+                    value = db.getBin(value)
+            elif attr == 'samples' and type(value) is str:
+                value = [ int(x) for x in value.split() ]
+
+            self.__dict__[attr] = value
+            self.__dict__['value'] = None
+        else:
+            super(Statistic, self).__setattr__(attr, value)
+
+    def getValue(self):
+        raise AttributeError, 'getValue() must be defined'
+
+    def zero(self):
+        return False
+
+    def __ne__(self, other):
+        return not (self == other)
+
+    def __str__(self):
+        return '%f' % (float(self))
+
+class FormulaStat(object):
+    def __add__(self, other):
+        f = FormulaStat()
+        f.value = binaryop(operator.add, self, other)
+        return f
+    def __sub__(self, other):
+        f = FormulaStat()
+        f.value = binaryop(operator.sub, self, other)
+        return f
+    def __mul__(self, other):
+        f = FormulaStat()
+        f.value = binaryop(operator.mul, self, other)
+        return f
+    def __truediv__(self, other):
+        f = FormulaStat()
+        f.value = binaryop(zerodiv, self, other)
+        return f
+    def __mod__(self, other):
+        f = FormulaStat()
+        f.value = binaryop(operator.mod, self, other)
+        return f
+    def __radd__(self, other):
+        f = FormulaStat()
+        f.value = binaryop(operator.add, other, self)
+        return f
+    def __rsub__(self, other):
+        f = FormulaStat()
+        f.value = binaryop(operator.sub, other, self)
+        return f
+    def __rmul__(self, other):
+        f = FormulaStat()
+        f.value = binaryop(operator.mul, other, self)
+        return f
+    def __rtruediv__(self, other):
+        f = FormulaStat()
+        f.value = binaryop(zerodiv, other, self)
+        return f
+    def __rmod__(self, other):
+        f = FormulaStat()
+        f.value = binaryop(operator.mod, other, self)
+        return f
+    def __neg__(self):
+        f = FormulaStat()
+        f.value = unaryop(operator.neg, self)
+        return f
+    def __getitem__(self, idx):
+        f = FormulaStat()
+        f.value = {}
+        for key in self.value.keys():
+            f.value[key] = {}
+            f.value[key][0] = {}
+            f.value[key][0][0] = self.value[key][idx][0]
+        return f
+
+    def __float__(self):
+        if isinstance(self.value, FormulaStat):
+            return float(self.value)
+        if not self.value.has_key(display_run):
+            return (1e300*1e300)
+        if len(self.value[display_run]) == 1:
+            return self.value[display_run][0][0]
+        else:
+            #print self.value[display_run]
+            return self.value[display_run][4][0]
+            #raise ValueError
+
+    def display(self):
+        import display
+        d = display.VectorDisplay()
+        d.flags = 0
+        d.precision = 1
+        d.name = 'formula'
+        d.desc = 'formula'
+        val = self.value[display_run]
+        d.value = [ val[x][0] for x in val.keys() ]
+        d.display()
+
+
+class Scalar(Statistic,FormulaStat):
+    def getValue(self):
+        return source.data(self, self.bins)
+
+    def display(self):
+        import display
+        p = display.Print()
+        p.name = self.name
+        p.desc = self.desc
+        p.value = float(self)
+        p.flags = self.flags
+        p.precision = self.precision
+        if display.all or (self.flags & flags.printable):
+            p.display()
+
+    def comparable(self, other):
+        return self.name == other.name
+
+    def __eq__(self, other):
+        return self.value == other.value
+
+    def __isub__(self, other):
+        self.value -= other.value
+        return self
+
+    def __iadd__(self, other):
+        self.value += other.value
+        return self
+
+    def __itruediv__(self, other):
+        if not other:
+            return self
+        self.value /= other
+        return self
+
+class Vector(Statistic,FormulaStat):
+    def getValue(self):
+        return source.data(self, self.bins);
+
+    def display(self):
+        import display
+        if not display.all and not (self.flags & flags.printable):
+            return
+
+        d = display.VectorDisplay()
+        d.__dict__.update(self.__dict__)
+        d.display()
+
+    def comparable(self, other):
+        return self.name == other.name and \
+               len(self.value) == len(other.value)
+
+    def __eq__(self, other):
+        if issequence(self.value) != issequence(other.value):
+            return false
+
+        if issequence(self.value):
+            if len(self.value) != len(other.value):
+                return False
+            else:
+                for v1,v2 in zip(self.value, other.value):
+                    if v1 != v2:
+                        return False
+                return True
+        else:
+            return self.value == other.value
+
+    def __isub__(self, other):
+        self.value = binaryop(operator.sub, self.value, other.value)
+        return self
+
+    def __iadd__(self, other):
+        self.value = binaryop(operator.add, self.value, other.value)
+        return self
+
+    def __itruediv__(self, other):
+        if not other:
+            return self
+        if issequence(self.value):
+            for i in xrange(len(self.value)):
+                self.value[i] /= other
+        else:
+            self.value /= other
+        return self
+
+class Formula(Vector):
+    def getValue(self):
+        formula = re.sub(':', '__', self.formula)
+        x = eval(formula, source.stattop)
+        return x.value
+
+    def comparable(self, other):
+        return self.name == other.name and \
+               compare(self.dist, other.dist)
+
+    def __eq__(self, other):
+        return self.value == other.value
+
+    def __isub__(self, other):
+        return self
+
+    def __iadd__(self, other):
+        return self
+
+    def __itruediv__(self, other):
+        if not other:
+            return self
+        return self
+
+class SimpleDist(object):
+    def __init__(self, sums, squares, samples):
+        self.sums = sums
+        self.squares = squares
+        self.samples = samples
+
+    def getValue(self):
+        return 0.0
+
+    def display(self, name, desc, flags, precision):
+        import display
+        p = display.Print()
+        p.flags = flags
+        p.precision = precision
+
+        if self.samples > 0:
+            p.name = name + ".mean"
+            p.value = self.sums / self.samples
+            p.display()
+
+            p.name = name + ".stdev"
+            if self.samples > 1:
+                var = (self.samples * self.squares - self.sums ** 2) \
+                      / (self.samples * (self.samples - 1))
+                if var >= 0:
+                    p.value = math.sqrt(var)
+                else:
+                    p.value = 'NaN'
+            else:
+                p.value = 0.0
+            p.display()
+
+        p.name = name + ".samples"
+        p.value = self.samples
+        p.display()
+
+    def comparable(self, other):
+        return True
+
+    def __eq__(self, other):
+        return self.sums == other.sums and self.squares == other.squares and \
+               self.samples == other.samples
+
+    def __isub__(self, other):
+        self.sums -= other.sums
+        self.squares -= other.squares
+        self.samples -= other.samples
+        return self
+
+    def __iadd__(self, other):
+        self.sums += other.sums
+        self.squares += other.squares
+        self.samples += other.samples
+        return self
+
+    def __itruediv__(self, other):
+        if not other:
+            return self
+        self.sums /= other
+        self.squares /= other
+        self.samples /= other
+        return self
+
+class FullDist(SimpleDist):
+    def __init__(self, sums, squares, samples, minval, maxval,
+                 under, vec, over, min, max, bsize, size):
+        self.sums = sums
+        self.squares = squares
+        self.samples = samples
+        self.minval = minval
+        self.maxval = maxval
+        self.under = under
+        self.vec = vec
+        self.over = over
+        self.min = min
+        self.max = max
+        self.bsize = bsize
+        self.size = size
+
+    def getValue(self):
+        return 0.0
+
+    def display(self, name, desc, flags, precision):
+        import display
+        p = display.Print()
+        p.flags = flags
+        p.precision = precision
+
+        p.name = name + '.min_val'
+        p.value = self.minval
+        p.display()
+
+        p.name = name + '.max_val'
+        p.value = self.maxval
+        p.display()
+
+        p.name = name + '.underflow'
+        p.value = self.under
+        p.display()
+
+        i = self.min
+        for val in self.vec[:-1]:
+            p.name = name + '[%d:%d]' % (i, i + self.bsize - 1)
+            p.value = val
+            p.display()
+            i += self.bsize
+
+        p.name = name + '[%d:%d]' % (i, self.max)
+        p.value = self.vec[-1]
+        p.display()
+
+
+        p.name = name + '.overflow'
+        p.value = self.over
+        p.display()
+
+        SimpleDist.display(self, name, desc, flags, precision)
+
+    def comparable(self, other):
+        return self.min == other.min and self.max == other.max and \
+               self.bsize == other.bsize and self.size == other.size
+
+    def __eq__(self, other):
+        return self.sums == other.sums and self.squares == other.squares and \
+               self.samples == other.samples
+
+    def __isub__(self, other):
+        self.sums -= other.sums
+        self.squares -= other.squares
+        self.samples -= other.samples
+
+        if other.samples:
+            self.minval = min(self.minval, other.minval)
+            self.maxval = max(self.maxval, other.maxval)
+            self.under -= under
+            self.vec = map(lambda x,y: x - y, self.vec, other.vec)
+            self.over -= over
+        return self
+
+    def __iadd__(self, other):
+        if not self.samples and other.samples:
+            self = other
+            return self
+
+        self.sums += other.sums
+        self.squares += other.squares
+        self.samples += other.samples
+
+        if other.samples:
+            self.minval = min(self.minval, other.minval)
+            self.maxval = max(self.maxval, other.maxval)
+            self.under += other.under
+            self.vec = map(lambda x,y: x + y, self.vec, other.vec)
+            self.over += other.over
+        return self
+
+    def __itruediv__(self, other):
+        if not other:
+            return self
+        self.sums /= other
+        self.squares /= other
+        self.samples /= other
+
+        if self.samples:
+            self.under /= other
+            for i in xrange(len(self.vec)):
+                self.vec[i] /= other
+            self.over /= other
+        return self
+
+class Dist(Statistic):
+    def getValue(self):
+        return 0.0
+
+    def display(self):
+        import display
+        if not display.all and not (self.flags & flags.printable):
+            return
+
+        self.dist.display(self.name, self.desc, self.flags, self.precision)
+
+    def comparable(self, other):
+        return self.name == other.name and \
+               self.dist.compareable(other.dist)
+
+    def __eq__(self, other):
+        return self.dist == other.dist
+
+    def __isub__(self, other):
+        self.dist -= other.dist
+        return self
+
+    def __iadd__(self, other):
+        self.dist += other.dist
+        return self
+
+    def __itruediv__(self, other):
+        if not other:
+            return self
+        self.dist /= other
+        return self
+
+class VectorDist(Statistic):
+    def getValue(self):
+        return 0.0
+
+    def display(self):
+        import display
+        if not display.all and not (self.flags & flags.printable):
+            return
+
+        if isinstance(self.dist, SimpleDist):
+            return
+
+        for dist,sn,sd,i in map(None, self.dist, self.subnames, self.subdescs,
+                                range(len(self.dist))):
+            if len(sn) > 0:
+                name = '%s.%s' % (self.name, sn)
+            else:
+                name = '%s[%d]' % (self.name, i)
+
+            if len(sd) > 0:
+                desc = sd
+            else:
+                desc = self.desc
+
+            dist.display(name, desc, self.flags, self.precision)
+
+        if (self.flags & flags.total) or 1:
+            if isinstance(self.dist[0], SimpleDist):
+                disttotal = SimpleDist( \
+                    reduce(sums, [d.sums for d in self.dist]),
+                    reduce(sums, [d.squares for d in self.dist]),
+                    reduce(sums, [d.samples for d in self.dist]))
+            else:
+                disttotal = FullDist( \
+                    reduce(sums, [d.sums for d in self.dist]),
+                    reduce(sums, [d.squares for d in self.dist]),
+                    reduce(sums, [d.samples for d in self.dist]),
+                    min([d.minval for d in self.dist]),
+                    max([d.maxval for d in self.dist]),
+                    reduce(sums, [d.under for d in self.dist]),
+                    reduce(sums, [d.vec for d in self.dist]),
+                    reduce(sums, [d.over for d in self.dist]),
+                    dist[0].min,
+                    dist[0].max,
+                    dist[0].bsize,
+                    dist[0].size)
+
+            name = '%s.total' % (self.name)
+            desc = self.desc
+            disttotal.display(name, desc, self.flags, self.precision)
+
+    def comparable(self, other):
+        return self.name == other.name and \
+               alltrue(map(lambda x, y : x.comparable(y),
+                           self.dist,
+                           other.dist))
+
+    def __eq__(self, other):
+        return alltrue(map(lambda x, y : x == y, self.dist, other.dist))
+
+    def __isub__(self, other):
+        if issequence(self.dist) and issequence(other.dist):
+            for sd,od in zip(self.dist, other.dist):
+                sd -= od
+        else:
+            self.dist -= other.dist
+        return self
+
+    def __iadd__(self, other):
+        if issequence(self.dist) and issequence(other.dist):
+            for sd,od in zip(self.dist, other.dist):
+                sd += od
+        else:
+            self.dist += other.dist
+        return self
+
+    def __itruediv__(self, other):
+        if not other:
+            return self
+        if issequence(self.dist):
+            for dist in self.dist:
+                dist /= other
+        else:
+            self.dist /= other
+        return self
+
+class Vector2d(Statistic):
+    def getValue(self):
+        return 0.0
+
+    def display(self):
+        import display
+        if not display.all and not (self.flags & flags.printable):
+            return
+
+        d = display.VectorDisplay()
+        d.__dict__.update(self.__dict__)
+
+        if self.__dict__.has_key('ysubnames'):
+            ysubnames = list(self.ysubnames)
+            slack = self.x - len(ysubnames)
+            if slack > 0:
+                ysubnames.extend(['']*slack)
+        else:
+            ysubnames = range(self.x)
+
+        for x,sname in enumerate(ysubnames):
+            o = x * self.y
+            d.value = self.value[o:o+self.y]
+            d.name = '%s[%s]' % (self.name, sname)
+            d.display()
+
+        if self.flags & flags.total:
+            d.value = []
+            for y in range(self.y):
+                xtot = 0.0
+                for x in range(self.x):
+                    xtot += self.value[y + x * self.x]
+                d.value.append(xtot)
+
+            d.name = self.name + '.total'
+            d.display()
+
+    def comparable(self, other):
+        return self.name == other.name and self.x == other.x and \
+               self.y == other.y
+
+    def __eq__(self, other):
+        return True
+
+    def __isub__(self, other):
+        return self
+
+    def __iadd__(self, other):
+        return self
+
+    def __itruediv__(self, other):
+        if not other:
+            return self
+        return self
+
+def NewStat(data):
+    stat = None
+    if data.type == 'SCALAR':
+        stat = Scalar(data)
+    elif data.type == 'VECTOR':
+        stat = Vector(data)
+    elif data.type == 'DIST':
+        stat = Dist(data)
+    elif data.type == 'VECTORDIST':
+        stat = VectorDist(data)
+    elif data.type == 'VECTOR2D':
+        stat = Vector2d(data)
+    elif data.type == 'FORMULA':
+        stat = Formula(data)
+
+    return stat
+
diff --git a/util/stats/print.py b/util/stats/print.py
new file mode 100644 (file)
index 0000000..f4492cd
--- /dev/null
@@ -0,0 +1,127 @@
+all = False
+descriptions = False
+
+class Value:
+    def __init__(self, value, precision, percent = False):
+        self.value = value
+        self.precision = precision
+        self.percent = percent
+    def __str__(self):
+        if isinstance(self.value, str):
+            if self.value.lower() == 'nan':
+                value = 'NaN'
+            if self.value.lower() == 'inf':
+                value = 'Inf'
+        else:
+            if self.precision >= 0:
+                format = "%%.%df" % self.precision
+            elif self.value == 0.0:
+                format = "%.0f"
+            elif self.value % 1.0 == 0.0:
+                format = "%.0f"
+            else:
+                format = "%f"
+            value = self.value
+            if self.percent:
+                value = value * 100.0
+            value = format % value
+
+        if self.percent:
+            value = value + "%"
+
+        return value
+
+class Print:
+    def __init__(self, **vals):
+        self.__dict__.update(vals)
+
+    def __str__(self):
+        value = Value(self.value, self.precision)
+        pdf = ''
+        cdf = ''
+        if self.__dict__.has_key('pdf'):
+            pdf = Value(self.pdf, 2, True)
+        if self.__dict__.has_key('cdf'):
+            cdf = Value(self.cdf, 2, True)
+
+        output = "%-40s %12s %8s %8s" % (self.name, value, pdf, cdf)
+
+        if descriptions and self.__dict__.has_key('desc') and self.desc:
+            output = "%s # %s" % (output, self.desc)
+
+        return output
+
+    def doprint(self):
+        if display_all:
+            return True
+        if self.value == 0.0 and (self.flags & flags_nozero):
+            return False
+        if isinstance(self.value, str):
+            if self.value == 'NaN' and (self.flags & flags_nonan):
+                return False
+        return True
+
+    def display(self):
+        if self.doprint():
+            print self
+
+class VectorDisplay:
+    def display(self):
+        p = Print()
+        p.flags = self.flags
+        p.precision = self.precision
+
+        if issequence(self.value):
+            if not len(self.value):
+                return
+
+            mytotal = reduce(lambda x,y: float(x) + float(y), self.value)
+            mycdf = 0.0
+
+            value = self.value
+
+            if display_all:
+                subnames = [ '[%d]' % i for i in range(len(value)) ]
+            else:
+                subnames = [''] * len(value)
+
+            if self.__dict__.has_key('subnames'):
+                for i,each in enumerate(self.subnames):
+                    if len(each) > 0:
+                        subnames[i] = '.%s' % each
+
+            subdescs = [self.desc]*len(value)
+            if self.__dict__.has_key('subdescs'):
+                for i in xrange(min(len(value), len(self.subdescs))):
+                    subdescs[i] = self.subdescs[i]
+
+            for val,sname,sdesc in map(None, value, subnames, subdescs):
+                if mytotal > 0.0:
+                    mypdf = float(val) / float(mytotal)
+                    mycdf += mypdf
+                    if (self.flags & flags_pdf):
+                        p.pdf = mypdf
+                        p.cdf = mycdf
+
+                if len(sname) == 0:
+                    continue
+
+                p.name = self.name + sname
+                p.desc = sdesc
+                p.value = val
+                p.display()
+
+            if (self.flags & flags_total):
+                if (p.__dict__.has_key('pdf')): del p.__dict__['pdf']
+                if (p.__dict__.has_key('cdf')): del p.__dict__['cdf']
+                p.name = self.name + '.total'
+                p.desc = self.desc
+                p.value = mytotal
+                p.display()
+
+        else:
+            p.name = self.name
+            p.desc = self.desc
+            p.value = self.value
+            p.display()
+
diff --git a/util/stats/stats.py b/util/stats/stats.py
new file mode 100755 (executable)
index 0000000..1d521fd
--- /dev/null
@@ -0,0 +1,478 @@
+#!/usr/bin/env python
+from __future__ import division
+import re, sys
+
+def usage():
+    print '''\
+Usage: %s [-E] [-F] [-d <db> ] [-g <get> ] [-h <host>] [-p]
+       [-s <system>] [-r <runs> ] [-u <username>] <command> [command args]
+''' % sys.argv[0]
+    sys.exit(1)
+
+def getopts(list, flags):
+    import getopt
+    try:
+        opts, args = getopt.getopt(list, flags)
+    except getopt.GetoptError:
+        usage()
+
+    return opts, args
+
+def printval(name, value, invert = False):
+    if invert and value != 0.0:
+        value = 1 / value
+
+    if value == (1e300*1e300):
+        return
+
+    if printval.mode == 'G':
+        print '%s:    %g' % (name, value)
+    elif printval.mode != 'F' and value > 1e6:
+        print '%s:    %0.5e' % (name, value)
+    else:
+        print '%s:    %f' % (name, value)
+
+printval.mode = 'G'
+
+def unique(list):
+    set = {}
+    map(set.__setitem__, list, [])
+    return set.keys()
+
+def graphdata(runs, tag, label, value):
+    import info
+    configs = ['std', 'csa', 'ht1', 'ht4', 'htx', 'ocm', 'occ', 'ocp' ]
+    benchmarks = [ 'm', 's' ]
+    dmas = [ 'x', 'd', 'b' ]
+    caches = [ '1', '2', '3', '4', '5' ]
+    systems = [ 'M' ]
+    checkpoints = [ '1' ]
+
+    names = []
+    for bench in benchmarks:
+        for dma in dmas:
+            for cache in caches:
+                for sys in systems:
+                    for cpt in checkpoints:
+                        names.append([bench, dma, cache, sys, cpt])
+
+    for bench,dma,cache,sys,cpt in names:
+        base = '%s.%s.%s.%s.%s' % (bench, dma, cache, sys, cpt)
+        fname = '/n/ziff/z/binkertn/graph/data.ibm/%s.%s.dat' % (tag, base)
+        f = open(fname, 'w')
+        print >>f, '#set TITLE = %s' % base
+        print >>f, '#set xlbl = Configuration'
+        print >>f, '#set ylbl = %s' % label
+        print >>f, '#set sublabels = %s' % ' '.join(configs)
+
+        for speed,freq in zip(['s', 'q'],['4GHz','10GHz']):
+            print >>f, '"%s"' % freq,
+            for conf in configs:
+                name = '%s.%s.%s.%s.%s.%s.%s' % (conf, bench, dma, speed,
+                                                 cache, sys, cpt)
+                run = info.source.allRunNames[name]
+                info.display_run = run.run;
+                val = float(value)
+                if val == 1e300*1e300:
+                    print >>f, 0.0,
+                else:
+                    print >>f, "%f" % val,
+            print >>f
+        f.close()
+
+def printdata(runs, value, invert = False):
+    import info
+    for run in runs:
+        info.display_run = run.run;
+        val = float(value)
+        printval(run.name, val)
+
+class CommandException(Exception):
+    pass
+
+def commands(options, command, args):
+    if command == 'database':
+        if len(args) == 0: raise CommandException
+
+        import dbinit
+        mydb = dbinit.MyDB(options)
+
+        if args[0] == 'drop':
+            if len(args) > 2: raise CommandException
+            mydb.admin()
+            mydb.drop()
+            if len(args) == 2 and args[1] == 'init':
+                mydb.create()
+                mydb.connect()
+                mydb.populate()
+            mydb.close()
+            return
+
+        if args[0] == 'init':
+            if len(args) > 1: raise CommandException
+            mydb.admin()
+            mydb.create()
+            mydb.connect()
+            mydb.populate()
+            mydb.close()
+            return
+
+        if args[0] == 'clean':
+            if len(args) > 1: raise CommandException
+            mydb.connect()
+            mydb.clean()
+            return
+
+        raise CommandException
+
+    import db, info
+    info.source = db.Database()
+    info.source.host = options.host
+    info.source.db = options.db
+    info.source.passwd = options.passwd
+    info.source.user = options.user
+    info.source.connect()
+    info.source.update_dict(globals())
+
+    system = info.source.__dict__[options.system]
+
+    if type(options.get) is str:
+        info.source.get = options.get
+
+    if options.runs is None:
+        runs = info.source.allRuns
+    else:
+        rx = re.compile(options.runs)
+        runs = []
+        for run in info.source.allRuns:
+            if rx.match(run.name):
+                runs.append(run)
+
+    info.display_run = runs[0].run
+
+    if command == 'runs':
+        user = None
+        opts, args = getopts(args, '-u')
+        if len(args):
+            raise CommandException
+        for o,a in opts:
+            if o == '-u':
+                user = a
+        info.source.listRuns(user)
+        return
+
+    if command == 'stats':
+        if len(args) == 0:
+            info.source.listStats()
+        elif len(args) == 1:
+            info.source.listStats(args[0])
+        else:
+            raise CommandException
+
+        return
+
+    if command == 'stat':
+        if len(args) != 1:
+            raise CommandException
+
+        stats = info.source.getStat(args[0])
+        for stat in stats:
+            if graph:
+                graphdata(runs, stat.name, stat.name, stat)
+            else:
+                print stat.name
+                printdata(runs, stat)
+        return
+
+    if command == 'bins':
+        if len(args) == 0:
+            info.source.listBins()
+        elif len(args) == 1:
+            info.source.listBins(args[0])
+        else:
+            raise CommandException
+
+        return
+
+    if command == 'formulas':
+        if len(args) == 0:
+            info.source.listFormulas()
+        elif len(args) == 1:
+            info.source.listFormulas(args[0])
+        else:
+            raise CommandException
+
+        return
+
+    if command == 'samples':
+        if len(args):
+            raise CommandException
+
+        info.source.listTicks(runs)
+        return
+
+    if len(args):
+        raise CommandException
+
+    if command == 'usertime':
+        import copy
+        kernel = copy.copy(system.full_cpu.numCycles)
+        kernel.bins = 'kernel'
+
+        user = copy.copy(system.full_cpu.numCycles)
+        user.bins = 'user'
+
+        if graph:
+            graphdata(runs, 'usertime', 'User Fraction',
+                      user / system.full_cpu.numCycles)
+        else:
+            printdata(runs, user / system.full_cpu.numCycles)
+        return
+
+    if command == 'ticks':
+        if binned:
+            print 'kernel ticks'
+            system.full_cpu.numCycles.bins = 'kernel'
+            printdata(runs, system.full_cpu.numCycles)
+
+            print 'idle ticks'
+            system.full_cpu.numCycles.bins = 'idle'
+            printdata(runs, system.full_cpu.numCycles)
+
+            print 'user ticks'
+            system.full_cpu.numCycles.bins = 'user'
+            printdata(runs, system.full_cpu.numCycles)
+
+            print 'total ticks'
+
+        system.full_cpu.numCycles.bins = None
+        printdata(runs, system.full_cpu.numCycles)
+        return
+
+    if command == 'packets':
+        packets = system.tsunami.nsgige.rxPackets
+        if graph:
+            graphdata(runs, 'packets', 'Packets', packets)
+        else:
+            printdata(runs, packets)
+        return
+
+    if command == 'ppt' or command == 'tpp':
+        ppt = system.tsunami.nsgige.rxPackets / sim_ticks
+        printdata(runs, ppt, command == 'tpp')
+        return
+
+    if command == 'pps':
+        pps = system.tsunami.nsgige.rxPackets / sim_seconds
+        if graph:
+            graphdata(runs, 'pps', 'Packets/s', pps)
+        else:
+            printdata(runs, pps)
+        return
+
+    if command == 'bpt' or command == 'tpb':
+        bytes = system.tsunami.nsgige.rxBytes + system.tsunami.nsgige.txBytes
+        bpt = bytes / sim_ticks * 8
+        if graph:
+            graphdata(runs, 'bpt', 'bps / Hz', bpt)
+        else:
+            printdata(runs, bpt, command == 'tpb')
+        return
+
+    if command == 'bptb' or command == 'tpbb':
+        bytes = system.tsunami.nsgige.rxBytes + system.tsunami.nsgige.txBytes
+
+        print 'kernel stats'
+        bytes.bins = 'kernel'
+        printdata(runs, bytes / ticks)
+
+        print 'idle stats'
+        bytes.bins = 'idle'
+        printdata(runs, bytes / ticks)
+
+        print 'user stats'
+        bytes.bins = 'user'
+        printdata(runs, bytes / ticks)
+
+        return
+
+    if command == 'bytes':
+        stat = system.tsunami.nsgige.rxBytes + system.tsunami.nsgige.txBytes
+
+        if binned:
+            print '%s kernel stats' % stat.name
+            stat.bins = 'kernel'
+            printdata(runs, stat)
+
+            print '%s idle stats' % stat.name
+            stat.bins = 'idle'
+            printdata(runs, stat)
+
+            print '%s user stats' % stat.name
+            stat.bins = 'user'
+            printdata(runs, stat)
+
+            print '%s total stats' % stat.name
+            stat.bins = None
+
+        printdata(runs, stat)
+        return
+
+    if command == 'rxbps':
+        gbps = system.tsunami.nsgige.rxBandwidth / 1e9
+        if graph:
+            graphdata(runs, 'rxbps', 'Bandwidth (Gbps)',  gbps)
+        else:
+            printdata(runs, gbps)
+        return
+
+    if command == 'txbps':
+        gbps = system.tsunami.nsgige.txBandwidth / 1e9
+        if graph:
+            graphdata(runs, 'txbps', 'Bandwidth (Gbps)',  gbps)
+        else:
+            printdata(runs, gbps)
+        return
+
+    if command == 'bps':
+        rxbps = system.tsunami.nsgige.rxBandwidth
+        txbps = system.tsunami.nsgige.txBandwidth
+        gbps = (rxbps + txbps) / 1e9
+        if graph:
+            graphdata(runs, 'bps', 'Bandwidth (Gbps)',  gbps)
+        else:
+            printdata(runs, gbps)
+        return
+
+    if command == 'misses':
+        stat = system.L3.overall_mshr_misses
+        if binned:
+            print '%s kernel stats' % stat.name
+            stat.bins = 'kernel'
+            printdata(runs, stat)
+
+            print '%s idle stats' % stat.name
+            stat.bins = 'idle'
+            printdata(runs, stat)
+
+            print '%s user stats' % stat.name
+            stat.bins = 'user'
+            printdata(runs, stat)
+
+            print '%s total stats' % stat.name
+
+        stat.bins = None
+        if graph:
+            graphdata(runs, 'misses', 'Overall MSHR Misses', stat)
+        else:
+            printdata(runs, stat)
+        return
+
+    if command == 'mpkb':
+        misses = system.L3.overall_mshr_misses
+        rxbytes = system.tsunami.nsgige.rxBytes
+        txbytes = system.tsunami.nsgige.txBytes
+
+        if binned:
+            print 'mpkb kernel stats'
+            misses.bins = 'kernel'
+            mpkb = misses / ((rxbytes + txbytes) / 1024)
+            printdata(runs, mpkb)
+
+            print 'mpkb idle stats'
+            misses.bins = 'idle'
+            mpkb = misses / ((rxbytes + txbytes) / 1024)
+            printdata(runs, mpkb)
+
+            print 'mpkb user stats'
+            misses.bins = 'user'
+            mpkb = misses / ((rxbytes + txbytes) / 1024)
+            printdata(runs, mpkb)
+
+            print 'mpkb total stats'
+
+        mpkb = misses / ((rxbytes + txbytes) / 1024)
+        misses.bins = None
+        if graph:
+            graphdata(runs, 'mpkb', 'Misses / KB',  mpkb)
+        else:
+            printdata(runs, mpkb)
+        return
+
+    if command == 'execute':
+        printdata(runs, system.full_cpu.ISSUE__count)
+        return
+
+    if command == 'commit':
+        printdata(runs, system.full_cpu.COM__count)
+        return
+
+    if command == 'fetch':
+        printdata(runs, system.full_cpu.FETCH__count)
+        return
+
+    if command == 'rxbpp':
+        bpp = system.tsunami.nsgige.rxBytes / system.tsunami.nsgige.rxPackets
+        printdata(run, 8 * bpp)
+        return
+
+    if command == 'txbpp':
+        bpp = system.tsunami.nsgige.txBytes / system.tsunami.nsgige.txPackets
+        printdata(run, 8 * bpp)
+        return
+
+    raise CommandException
+
+
+graph = False
+binned = False
+
+class Options: pass
+
+if __name__ == '__main__':
+    import getpass
+
+    options = Options()
+    options.host = 'zizzer.pool'
+    options.db = None
+    options.passwd = ''
+    options.user = getpass.getuser()
+    options.runs = None
+    options.system = 'client'
+    options.get = None
+
+    opts, args = getopts(sys.argv[1:], '-BEFGd:g:h:pr:s:u:')
+    for o,a in opts:
+        if o == '-B':
+            options.binned = True
+        if o == '-E':
+            printval.mode = 'E'
+        if o == '-F':
+            printval.mode = 'F'
+        if o == '-G':
+            options.graph = True;
+        if o == '-d':
+            options.db = a
+        if o == '-g':
+            options.get = a
+        if o == '-h':
+            options.host = a
+        if o == '-p':
+            options.passwd = getpass.getpass()
+        if o == '-r':
+            options.runs = a
+        if o == '-u':
+            options.user = a
+        if o == '-s':
+            options.system = a
+
+    if len(args) == 0:
+        usage()
+
+    command = args[0]
+    args = args[1:]
+
+    try:
+        commands(options, command, args)
+    except CommandException:
+        usage()