Totally re-do/reorganize the python part of the statistics code

author Nathan Binkert <binkertn@umich.edu>

Tue, 10 Aug 2004 01:20:52 +0000 (21:20 -0400)

committer Nathan Binkert <binkertn@umich.edu>

Tue, 10 Aug 2004 01:20:52 +0000 (21:20 -0400)
author Nathan Binkert <binkertn@umich.edu>
Tue, 10 Aug 2004 01:20:52 +0000 (21:20 -0400)
committer Nathan Binkert <binkertn@umich.edu>
Tue, 10 Aug 2004 01:20:52 +0000 (21:20 -0400)
diff --git a/util/stats/db.py b/util/stats/db.py

new file mode 100644 (file)

index 0000000..4cba824
--- /dev/null
+++ b/util/stats/db.py
@@ -0,0 +1,415 @@
+import MySQLdb, re, string
+
+def statcmp(a, b):
+    v1 = a.split('.')
+    v2 = b.split('.')
+
+    last = min(len(v1), len(v2)) - 1
+    for i,j in zip(v1[0:last], v2[0:last]):
+        if i != j:
+            return cmp(i, j)
+
+    # Special compare for last element.
+    if len(v1) == len(v2):
+        return cmp(v1[last], v2[last])
+    else:
+        return cmp(len(v1), len(v2))
+
+class RunData:
+    def __init__(self, row):
+        self.run = int(row[0])
+        self.name = row[1]
+        self.user = row[2]
+        self.project = row[3]
+
+class SubData:
+    def __init__(self, row):
+        self.stat = int(row[0])
+        self.x = int(row[1])
+        self.y = int(row[2])
+        self.name = row[3]
+        self.descr = row[4]
+
+class Data:
+    def __init__(self, row):
+        if len(row) != 5:
+            raise 'stat db error'
+        self.stat = int(row[0])
+        self.run = int(row[1])
+        self.x = int(row[2])
+        self.y = int(row[3])
+        self.data = float(row[4])
+
+    def __repr__(self):
+        return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat,
+            self.run, self.x, self.y, self.data)
+
+class StatData(object):
+    def __init__(self, row):
+        self.stat = int(row[0])
+        self.name = row[1]
+        self.desc = row[2]
+        self.type = row[3]
+        self.prereq = int(row[5])
+        self.precision = int(row[6])
+
+        import flags
+        self.flags = 0
+        if int(row[4]): self.flags |= flags.printable
+        if int(row[7]): self.flags |= flags.nozero
+        if int(row[8]): self.flags |= flags.nonan
+        if int(row[9]): self.flags |= flags.total
+        if int(row[10]): self.flags |= flags.pdf
+        if int(row[11]): self.flags |= flags.cdf
+
+        if self.type == 'DIST' or self.type == 'VECTORDIST':
+            self.min = float(row[12])
+            self.max = float(row[13])
+            self.bktsize = float(row[14])
+            self.size = int(row[15])
+
+        if self.type == 'FORMULA':
+            self.formula = self.db.allFormulas[self.stat]
+
+class Node(object):
+    def __init__(self, name):
+        self.name = name
+    def __str__(self):
+        return name
+
+class Database(object):
+    def __init__(self):
+        self.host = 'zizzer.pool'
+        self.user = ''
+        self.passwd = ''
+        self.db = 'm5stats'
+        self.cursor = None
+
+        self.allStats = []
+        self.allStatIds = {}
+        self.allStatNames = {}
+
+        self.allSubData = {}
+
+        self.allRuns = []
+        self.allRunIds = {}
+        self.allRunNames = {}
+
+        self.allBins = []
+        self.allBinIds = {}
+        self.allBinNames = {}
+
+        self.allFormulas = {}
+
+        self.stattop = {}
+        self.statdict = {}
+        self.statlist = []
+
+        self.mode = 'sum';
+        self.runs = None
+        self.bins = None
+        self.ticks = None
+        self.__dict__['get'] = type(self).sum
+
+    def query(self, sql):
+        self.cursor.execute(sql)
+
+    def update_dict(self, dict):
+        dict.update(self.stattop)
+
+    def append(self, stat):
+        statname = re.sub(':', '__', stat.name)
+        path = string.split(statname, '.')
+        pathtop = path[0]
+        fullname = ''
+
+        x = self
+        while len(path) > 1:
+            name = path.pop(0)
+            if not x.__dict__.has_key(name):
+                x.__dict__[name] = Node(fullname + name)
+            x = x.__dict__[name]
+            fullname = '%s%s.' % (fullname, name)
+
+        name = path.pop(0)
+        x.__dict__[name] = stat
+
+        self.stattop[pathtop] = self.__dict__[pathtop]
+        self.statdict[statname] = stat
+        self.statlist.append(statname)
+
+    def connect(self):
+        # connect
+        self.thedb = MySQLdb.connect(db=self.db,
+                                     host=self.host,
+                                     user=self.user,
+                                     passwd=self.passwd)
+
+        # create a cursor
+        self.cursor = self.thedb.cursor()
+
+        self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project
+                   from runs''')
+        for result in self.cursor.fetchall():
+            run = RunData(result);
+            self.allRuns.append(run)
+            self.allRunIds[run.run] = run
+            self.allRunNames[run.name] = run
+
+        self.query('select * from bins')
+        for id,name in self.cursor.fetchall():
+            self.allBinIds[int(id)] = name
+            self.allBinNames[name] = int(id)
+
+        self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata')
+        for result in self.cursor.fetchall():
+            subdata = SubData(result)
+            if self.allSubData.has_key(subdata.stat):
+                self.allSubData[subdata.stat].append(subdata)
+            else:
+                self.allSubData[subdata.stat] = [ subdata ]
+
+        self.query('select * from formulas')
+        for id,formula in self.cursor.fetchall():
+            self.allFormulas[int(id)] = formula
+
+        StatData.db = self
+        self.query('select * from stats')
+        import info
+        for result in self.cursor.fetchall():
+            stat = info.NewStat(StatData(result))
+            self.append(stat)
+            self.allStats.append(stat)
+            self.allStatIds[stat.stat] = stat
+            self.allStatNames[stat.name] = stat
+
+    # Name: listbins
+    # Desc: Prints all bins matching regex argument, if no argument
+    #       is given all bins are returned
+    def listBins(self, regex='.*'):
+        print '%-50s %-10s' % ('bin name', 'id')
+        print '-' * 61
+        names = self.allBinNames.keys()
+        names.sort()
+        for name in names:
+            id = self.allBinNames[name]
+            print '%-50s %-10d' % (name, id)
+
+    # Name: listruns
+    # Desc: Prints all runs matching a given user, if no argument
+    #       is given all runs are returned
+    def listRuns(self, user=None):
+        print '%-40s %-10s %-5s' % ('run name', 'user', 'id')
+        print '-' * 62
+        for run in self.allRuns:
+            if user == None or user == run.user:
+                print '%-40s %-10s %-10d' % (run.name, run.user, run.run)
+
+    # Name: listTicks
+    # Desc: Prints all samples for a given run
+    def listTicks(self, run=None):
+        print "tick"
+        print "----------------------------------------"
+        sql = 'select distinct dt_tick from data where dt_stat=1950'
+        #if run != None:
+        #    sql += ' where dt_run=%d' % run
+        self.query(sql)
+        for r in self.cursor.fetchall():
+            print r[0]
+
+    # Name: liststats
+    # Desc: Prints all statistics that appear in the database,
+    #         the optional argument is a regular expression that can
+    #         be used to prune the result set
+    def listStats(self, regex=None):
+        print '%-60s %-8s %-10s' % ('stat name', 'id', 'type')
+        print '-' * 80
+
+        rx = None
+        if regex != None:
+            rx = re.compile(regex)
+
+        stats = [ stat.name for stat in self.allStats ]
+        stats.sort(statcmp)
+        for stat in stats:
+            stat = self.allStatNames[stat]
+            if rx == None or rx.match(stat.name):
+                print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type)
+
+    # Name: liststats
+    # Desc: Prints all statistics that appear in the database,
+    #         the optional argument is a regular expression that can
+    #         be used to prune the result set
+    def listFormulas(self, regex=None):
+        print '%-60s %s' % ('formula name', 'formula')
+        print '-' * 80
+
+        rx = None
+        if regex != None:
+            rx = re.compile(regex)
+
+        stats = [ stat.name for stat in self.allStats ]
+        stats.sort(statcmp)
+        for stat in stats:
+            stat = self.allStatNames[stat]
+            if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)):
+                print '%-60s %s' % (stat.name, self.allFormulas[stat.stat])
+
+    def getStat(self, stats):
+        if type(stats) is not list:
+            stats = [ stats ]
+
+        ret = []
+        for stat in stats:
+            if type(stat) is int:
+                ret.append(self.allStatIds[stat])
+
+            if type(stat) is str:
+                rx = re.compile(stat)
+                for stat in self.allStats:
+                    if rx.match(stat.name):
+                        ret.append(stat)
+        return ret
+
+    def getBin(self, bins):
+        if type(bins) is not list:
+            bins = [ bins ]
+
+        ret = []
+        for bin in bins:
+            if type(bin) is int:
+                ret.append(bin)
+            elif type(bin) is str:
+                ret.append(self.allBinNames[bin])
+            else:
+                for name,id in self.allBinNames.items():
+                    if bin.match(name):
+                        ret.append(id)
+
+        return ret
+
+    def getNotBin(self, bin):
+        map = {}
+        for bin in getBin(bin):
+            map[bin] = 1
+
+        ret = []
+        for bin in self.allBinIds.keys():
+            if not map.has_key(bin):
+                ret.append(bin)
+
+        return ret
+
+    #########################################
+    # get the data
+    #
+    def inner(self, op, stat, bins, ticks, group=False):
+        sql = 'select '
+        sql += 'dt_stat as stat, '
+        sql += 'dt_run as run, '
+        sql += 'dt_x as x, '
+        sql += 'dt_y as y, '
+        if group:
+            sql += 'dt_tick as tick, '
+        sql += '%s(dt_data) as data ' % op
+        sql += 'from data '
+        sql += 'where '
+
+        if isinstance(stat, list):
+            val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ])
+            sql += ' (%s)' % val
+        else:
+            sql += ' dt_stat=%d' % stat.stat
+
+        if self.runs != None and len(self.runs):
+            val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ])
+            sql += ' and (%s)' % val
+
+        if bins != None and len(bins):
+            val = ' or '.join([ 'dt_bin=%d' % b for b in bins ])
+            sql += ' and (%s)' % val
+
+        if ticks != None and len(ticks):
+            val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ])
+            sql += ' and (%s)' % val
+
+        sql += ' group by dt_stat,dt_run,dt_x,dt_y'
+        if group:
+            sql += ',dt_tick'
+        return sql
+
+    def outer(self, op_out, op_in, stat, bins, ticks):
+        sql = self.inner(op_in, stat, bins, ticks, True)
+        sql = 'select stat,run,x,y,%s(data) from (%s) as tb ' % (op_out, sql)
+        sql += 'group by stat,run,x,y'
+        return sql
+
+    # Name: sum
+    # Desc: given a run, a stat and an array of samples and bins,
+    #        sum all the bins and then get the standard deviation of the
+    #        samples for non-binned runs. This will just return the average
+    #        of samples, however a bin array still must be passed
+    def sum(self, stat, bins, ticks):
+        return self.inner('sum', stat, bins, ticks)
+
+    # Name: avg
+    # Desc: given a run, a stat and an array of samples and bins,
+    #        sum all the bins and then average the samples for non-binned
+    #        runs this will just return the average of samples, however
+    #        a bin array still must be passed
+    def avg(self, stat, bins, ticks):
+        return self.outer('avg', 'sum', stat, bins, ticks)
+
+    # Name: stdev
+    # Desc: given a run, a stat and an array of samples and bins,
+    #        sum all the bins and then get the standard deviation of the
+    #        samples for non-binned runs. This will just return the average
+    #        of samples, however a bin array still must be passed
+    def stdev(self, stat, bins, ticks):
+        return self.outer('stddev', 'sum', stat, bins, ticks)
+
+    def __getattribute__(self, attr):
+        if attr != 'get':
+            return super(Database, self).__getattribute__(attr)
+
+        if self.__dict__['get'] == type(self).sum:
+            return 'sum'
+        elif self.__dict__['get'] == type(self).avg:
+            return 'avg'
+        elif self.__dict__['get'] == type(self).stdev:
+            return 'stdev'
+        else:
+            return ''
+
+    def __setattr__(self, attr, value):
+        if attr != 'get':
+            super(Database, self).__setattr__(attr, value)
+            return
+
+        if value == 'sum':
+            self.__dict__['get'] = type(self).sum
+        elif value == 'avg':
+            self.__dict__['get'] = type(self).avg
+        elif value == 'stdev':
+            self.__dict__['get'] = type(self).stdev
+        else:
+            raise AttributeError, "can only set get to: sum | avg | stdev"
+
+    def data(self, stat, bins=None, ticks=None):
+        if bins is None:
+            bins = self.bins
+        if ticks is None:
+            ticks = self.ticks
+        sql = self.__dict__['get'](self, stat, bins, ticks)
+        self.query(sql)
+
+        runs = {}
+        for x in self.cursor.fetchall():
+            data = Data(x)
+            if not runs.has_key(data.run):
+                runs[data.run] = {}
+            if not runs[data.run].has_key(data.x):
+                runs[data.run][data.x] = {}
+
+            runs[data.run][data.x][data.y] = data.data
+        return runs
diff --git a/util/stats/dbinit.py b/util/stats/dbinit.py

new file mode 100644 (file)

index 0000000..686f55c
--- /dev/null
+++ b/util/stats/dbinit.py
@@ -0,0 +1,388 @@
+import MySQLdb
+
+class MyDB(object):
+    def __init__(self, options):
+        self.name = options.db
+        self.host = options.host
+        self.user = options.user
+        self.passwd = options.passwd
+        self.mydb = None
+        self.cursor = None
+
+    def admin(self):
+        self.close()
+        self.mydb = MySQLdb.connect(db='mysql', host=self.host, user=self.user,
+                                    passwd=self.passwd)
+        self.cursor = self.mydb.cursor()
+
+    def connect(self):
+        self.close()
+        self.mydb = MySQLdb.connect(db=self.name, host=self.host,
+                                    user=self.user, passwd=self.passwd)
+        self.cursor = self.mydb.cursor()
+
+    def close(self):
+        if self.mydb is not None:
+            self.mydb.close()
+        self.cursor = None
+
+    def query(self, sql):
+        self.cursor.execute(sql)
+
+    def drop(self):
+        self.query('DROP DATABASE IF EXISTS %s' % self.name)
+
+    def create(self):
+        self.query('CREATE DATABASE %s' % self.name)
+
+    def populate(self):
+        #
+        # Each run (or simulation) gets its own entry in the runs table to
+        # group stats by where they were generated
+        #
+        # COLUMNS:
+        #   'id' is a unique identifier for each run to be used in other
+        #       tables.
+        #   'name' is the user designated name for the data generated.  It is
+        #       configured in the simulator.
+        #   'user' identifies the user that generated the data for the given
+        #       run.
+        #   'project' another name to identify runs for a specific goal
+        #   'date' is a timestamp for when the data was generated.  It can be
+        #       used to easily expire data that was generated in the past.
+        #   'expire' is a timestamp for when the data should be removed from
+        #       the database so we don't have years worth of junk.
+        #
+        # INDEXES:
+        #   'run' is indexed so you can find out details of a run if the run
+        #       was retreived from the data table.
+        #   'name' is indexed so that two all run names are forced to be unique
+        #
+        self.query('''
+        CREATE TABLE runs(
+            rn_id      SMALLINT UNSIGNED       NOT NULL AUTO_INCREMENT,
+            rn_name    VARCHAR(200)            NOT NULL,
+            rn_sample  VARCHAR(32)             NOT NULL,
+            rn_user    VARCHAR(32)             NOT NULL,
+            rn_project VARCHAR(100)            NOT NULL,
+            rn_date    TIMESTAMP               NOT NULL,
+            rn_expire  TIMESTAMP               NOT NULL,
+            PRIMARY KEY (rn_id),
+            UNIQUE (rn_name,rn_sample)
+        ) TYPE=InnoDB''')
+
+        #
+        # We keep the bin names separate so that the data table doesn't get
+        # huge since bin names are frequently repeated.
+        #
+        # COLUMNS:
+        #   'id' is the unique bin identifer.
+        #   'name' is the string name for the bin.
+        #
+        # INDEXES:
+        #   'bin' is indexed to get the name of a bin when data is retrieved
+        #       via the data table.
+        #   'name' is indexed to get the bin id for a named bin when you want
+        #       to search the data table based on a specific bin.
+        #
+        self.query('''
+        CREATE TABLE bins(
+            bn_id      SMALLINT UNSIGNED       NOT NULL AUTO_INCREMENT,
+            bn_name    VARCHAR(255)            NOT NULL,
+            PRIMARY KEY(bn_id),
+            UNIQUE (bn_name)
+        ) TYPE=InnoDB''')
+
+        #
+        # The stat table gives us all of the data for a particular stat.
+        #
+        # COLUMNS:
+        #   'stat' is a unique identifier for each stat to be used in other
+        #       tables for references.
+        #   'name' is simply the simulator derived name for a given
+        #       statistic.
+        #   'descr' is the description of the statistic and what it tells
+        #       you.
+        #   'type' defines what the stat tells you.  Types are:
+        #       SCALAR: A simple scalar statistic that holds one value
+        #       VECTOR: An array of statistic values.  Such a something that
+        #           is generated per-thread.  Vectors exist to give averages,
+        #           pdfs, cdfs, means, standard deviations, etc across the
+        #           stat values.
+        #       DIST: Is a distribution of data.  When the statistic value is
+        #           sampled, its value is counted in a particular bucket.
+        #           Useful for keeping track of utilization of a resource.
+        #           (e.g. fraction of time it is 25% used vs. 50% vs. 100%)
+        #       VECTORDIST: Can be used when the distribution needs to be
+        #           factored out into a per-thread distribution of data for
+        #           example.  It can still be summed across threads to find
+        #           the total distribution.
+        #       VECTOR2D: Can be used when you have a stat that is not only
+        #           per-thread, but it is per-something else.  Like
+        #           per-message type.
+        #       FORMULA: This statistic is a formula, and its data must be
+        #           looked up in the formula table, for indicating how to
+        #           present its values.
+        #   'subdata' is potentially used by any of the vector types to
+        #       give a specific name to all of the data elements within a
+        #       stat.
+        #   'print' indicates whether this stat should be printed ever.
+        #       (Unnamed stats don't usually get printed)
+        #   'prereq' only print the stat if the prereq is not zero.
+        #   'prec' number of decimal places to print
+        #   'nozero' don't print zero values
+        #   'nonan' don't print NaN values
+        #   'total' for vector type stats, print the total.
+        #   'pdf' for vector type stats, print the pdf.
+        #   'cdf' for vector type stats, print the cdf.
+        #
+        #   The Following are for dist type stats:
+        #   'min' is the minimum bucket value. Anything less is an underflow.
+        #   'max' is the maximum bucket value. Anything more is an overflow.
+        #   'bktsize' is the approximate number of entries in each bucket.
+        #   'size' is the number of buckets. equal to (min/max)/bktsize.
+        #
+        # INDEXES:
+        #   'stat' is indexed so that you can find out details about a stat
+        #       if the stat id was retrieved from the data table.
+        #   'name' is indexed so that you can simply look up data about a
+        #       named stat.
+        #
+        self.query('''
+        CREATE TABLE stats(
+            st_id      SMALLINT UNSIGNED       NOT NULL AUTO_INCREMENT,
+            st_name    VARCHAR(255)            NOT NULL,
+            st_descr   TEXT                    NOT NULL,
+            st_type    ENUM("SCALAR", "VECTOR", "DIST", "VECTORDIST",
+                "VECTOR2D", "FORMULA") NOT NULL,
+            st_print   BOOL                    NOT NULL,
+            st_prereq  SMALLINT UNSIGNED       NOT NULL,
+            st_prec    TINYINT                 NOT NULL,
+            st_nozero  BOOL                    NOT NULL,
+            st_nonan   BOOL                    NOT NULL,
+            st_total   BOOL                    NOT NULL,
+            st_pdf     BOOL                    NOT NULL,
+            st_cdf     BOOL                    NOT NULL,
+            st_min     DOUBLE                  NOT NULL,
+            st_max     DOUBLE                  NOT NULL,
+            st_bktsize DOUBLE                  NOT NULL,
+            st_size    SMALLINT UNSIGNED       NOT NULL,
+            PRIMARY KEY (st_id),
+            UNIQUE (st_name)
+        ) TYPE=InnoDB''')
+
+        #
+        # This is the main table of data for stats.
+        #
+        # COLUMNS:
+        #   'stat' refers to the stat field given in the stat table.
+        #
+        #   'x' referrs to the first dimension of a multi-dimensional stat. For
+        #       a vector, x will start at 0 and increase for each vector
+        #       element.
+        #       For a distribution:
+        #       -1: sum (for calculating standard deviation)
+        #       -2: sum of squares (for calculating standard deviation)
+        #       -3: total number of samples taken (for calculating
+        #           standard deviation)
+        #       -4: minimum value
+        #       -5: maximum value
+        #       -6: underflow
+        #       -7: overflow
+        #   'y' is used by a VECTORDIST and the VECTOR2D to describe the second
+        #       dimension.
+        #   'run' is the run that the data was generated from.  Details up in
+        #       the run table
+        #   'tick' is a timestamp generated by the simulator.
+        #   'bin' is the name of the bin that the data was generated in, if
+        #       any.
+        #   'data' is the actual stat value.
+        #
+        # INDEXES:
+        #   'stat' is indexed so that a user can find all of the data for a
+        #       particular stat. It is not unique, because that specific stat
+        #       can be found in many runs, bins, and samples, in addition to
+        #       having entries for the mulidimensional cases.
+        #   'run' is indexed to allow a user to remove all of the data for a
+        #       particular execution run.  It can also be used to allow the
+        #       user to print out all of the data for a given run.
+        #
+        self.query('''
+        CREATE TABLE data(
+            dt_stat    SMALLINT UNSIGNED       NOT NULL,
+            dt_x       SMALLINT                NOT NULL,
+            dt_y       SMALLINT                NOT NULL,
+            dt_run     SMALLINT UNSIGNED       NOT NULL,
+            dt_tick    BIGINT UNSIGNED         NOT NULL,
+            dt_bin     SMALLINT UNSIGNED       NOT NULL,
+            dt_data    DOUBLE                  NOT NULL,
+            INDEX (dt_stat),
+            INDEX (dt_run),
+            UNIQUE (dt_stat,dt_x,dt_y,dt_run,dt_tick,dt_bin)
+        ) TYPE=InnoDB;''')
+
+        #
+        # Names and descriptions for multi-dimensional stats (vectors, etc.)
+        # are stored here instead of having their own entry in the statistics
+        # table. This allows all parts of a single stat to easily share a
+        # single id.
+        #
+        # COLUMNS:
+        #   'stat' is the unique stat identifier from the stat table.
+        #   'x' is the first dimension for multi-dimensional stats
+        #       corresponding to the data table above.
+        #   'y' is the second dimension for multi-dimensional stats
+        #       corresponding to the data table above.
+        #   'name' is the specific subname for the unique stat,x,y combination.
+        #   'descr' is the specific description for the uniqe stat,x,y
+        #        combination.
+        #
+        # INDEXES:
+        #   'stat' is indexed so you can get the subdata for a specific stat.
+        #
+        self.query('''
+        CREATE TABLE subdata(
+            sd_stat    SMALLINT UNSIGNED       NOT NULL,
+            sd_x       SMALLINT                NOT NULL,
+            sd_y       SMALLINT                NOT NULL,
+            sd_name    VARCHAR(255)            NOT NULL,
+            sd_descr   TEXT,
+            UNIQUE (sd_stat,sd_x,sd_y)
+        ) TYPE=InnoDB''')
+
+
+        #
+        # The formula table is maintained separately from the data table
+        # because formula data, unlike other stat data cannot be represented
+        # there.
+        #
+        # COLUMNS:
+        #   'stat' refers to the stat field generated in the stat table.
+        #   'formula' is the actual string representation of the formula
+        #       itself.
+        #
+        # INDEXES:
+        #   'stat' is indexed so that you can just look up a formula.
+        #
+        self.query('''
+        CREATE TABLE formulas(
+            fm_stat    SMALLINT UNSIGNED       NOT NULL,
+            fm_formula BLOB                    NOT NULL,
+            PRIMARY KEY(fm_stat)
+        ) TYPE=InnoDB''')
+
+        #
+        # Each stat used in each formula is kept in this table.  This way, if
+        # you want to print out a particular formula, you can simply find out
+        # which stats you need by looking in this table.  Additionally, when
+        # you remove a stat from the stats table and data table, you remove
+        # any references to the formula in this table.  When a formula is no
+        # longer referred to, you remove its entry.
+        #
+        # COLUMNS:
+        #   'stat' is the stat id from the stat table above.
+        #   'child' is the stat id of a stat that is used for this formula.
+        #       There may be many children for any given 'stat' (formula)
+        #
+        # INDEXES:
+        #   'stat' is indexed so you can look up all of the children for a
+        #       particular stat.
+        #   'child' is indexed so that you can remove an entry when a stat is
+        #       removed.
+        #
+        self.query('''
+        CREATE TABLE formula_ref(
+            fr_stat    SMALLINT UNSIGNED       NOT NULL,
+            fr_run     SMALLINT UNSIGNED       NOT NULL,
+            UNIQUE (fr_stat,fr_run),
+            INDEX (fr_stat),
+            INDEX (fr_run)
+        ) TYPE=InnoDB''')
+
+        # COLUMNS:
+        #   'event' is the unique event id from the event_desc table
+        #   'run' is simulation run id that this event took place in
+        #   'tick' is the tick when the event happened
+        #
+        # INDEXES:
+        #   'event' is indexed so you can look up all occurences of a
+        #       specific event
+        #   'run' is indexed so you can find all events in a run
+        #   'tick' is indexed because we want the unique thing anyway
+        #   'event,run,tick' is unique combination
+        self.query('''
+        CREATE TABLE events(
+            ev_event   SMALLINT UNSIGNED       NOT NULL,
+            ev_run     SMALLINT UNSIGNED       NOT NULL,
+            ev_tick    BIGINT   UNSIGNED       NOT NULL,
+            INDEX(ev_event),
+            INDEX(ev_run),
+            INDEX(ev_tick),
+            UNIQUE(ev_event,ev_run,ev_tick)
+        ) TYPE=InnoDB''')
+
+        # COLUMNS:
+        #   'id' is the unique description id
+        #   'name' is the name of the event that occurred
+        #
+        # INDEXES:
+        #   'id' is indexed because it is the primary key and is what you use
+        #       to look up the descriptions
+        #   'name' is indexed so one can find the event based on name
+        #
+        self.query('''
+        CREATE TABLE event_names(
+            en_id      SMALLINT UNSIGNED       NOT NULL AUTO_INCREMENT,
+            en_name    VARCHAR(255)            NOT NULL,
+            PRIMARY KEY (en_id),
+            UNIQUE (en_name)
+        ) TYPE=InnoDB''')
+
+    def clean(self):
+        self.query('''
+        DELETE data
+        FROM data
+        LEFT JOIN runs ON dt_run=rn_id
+        WHERE rn_id IS NULL''')
+
+        self.query('''
+        DELETE formula_ref
+        FROM formula_ref
+        LEFT JOIN runs ON fr_run=rn_id
+        WHERE rn_id IS NULL''')
+
+        self.query('''
+        DELETE formulas
+        FROM formulas
+        LEFT JOIN formula_ref ON fm_stat=fr_stat
+        WHERE fr_stat IS NULL''')
+
+        self.query('''
+        DELETE stats
+        FROM stats
+        LEFT JOIN data ON st_id=dt_stat
+        WHERE dt_stat IS NULL''')
+
+        self.query('''
+        DELETE subdata
+        FROM subdata
+        LEFT JOIN data ON sd_stat=dt_stat
+        WHERE dt_stat IS NULL''')
+
+        self.query('''
+        DELETE bins
+        FROM bins
+        LEFT JOIN data ON bn_id=dt_bin
+        WHERE dt_bin IS NULL''')
+
+        self.query('''
+        DELETE events
+        FROM events
+        LEFT JOIN runs ON ev_run=rn_id
+        WHERE rn_id IS NULL''')
+
+        self.query('''
+        DELETE event_names
+        FROM event_names
+        LEFT JOIN events ON en_id=ev_event
+        WHERE ev_event IS NULL''')
diff --git a/util/stats/display.py b/util/stats/display.py

new file mode 100644 (file)

index 0000000..68a2685
--- /dev/null
+++ b/util/stats/display.py
@@ -0,0 +1,124 @@
+class Value:
+    def __init__(self, value, precision, percent = False):
+        self.value = value
+        self.precision = precision
+        self.percent = percent
+    def __str__(self):
+        if isinstance(self.value, str):
+            if self.value.lower() == 'nan':
+                value = 'NaN'
+            if self.value.lower() == 'inf':
+                value = 'Inf'
+        else:
+            if self.precision >= 0:
+                format = "%%.%df" % self.precision
+            elif self.value == 0.0:
+                format = "%.0f"
+            elif self.value % 1.0 == 0.0:
+                format = "%.0f"
+            else:
+                format = "%f"
+            value = self.value
+            if self.percent:
+                value = value * 100.0
+            value = format % value
+
+        if self.percent:
+            value = value + "%"
+
+        return value
+
+class Print:
+    def __init__(self, **vals):
+        self.__dict__.update(vals)
+
+    def __str__(self):
+        value = Value(self.value, self.precision)
+        pdf = ''
+        cdf = ''
+        if self.__dict__.has_key('pdf'):
+            pdf = Value(self.pdf, 2, True)
+        if self.__dict__.has_key('cdf'):
+            cdf = Value(self.cdf, 2, True)
+
+        output = "%-40s %12s %8s %8s" % (self.name, value, pdf, cdf)
+
+        if descriptions and self.__dict__.has_key('desc') and self.desc:
+            output = "%s # %s" % (output, self.desc)
+
+        return output
+
+    def doprint(self):
+        if display_all:
+            return True
+        if self.value == 0.0 and (self.flags & flags_nozero):
+            return False
+        if isinstance(self.value, str):
+            if self.value == 'NaN' and (self.flags & flags_nonan):
+                return False
+        return True
+
+    def display(self):
+        if self.doprint():
+            print self
+
+class VectorDisplay:
+    def display(self):
+        p = Print()
+        p.flags = self.flags
+        p.precision = self.precision
+
+        if issequence(self.value):
+            if not len(self.value):
+                return
+
+            mytotal = reduce(lambda x,y: float(x) + float(y), self.value)
+            mycdf = 0.0
+
+            value = self.value
+
+            if display_all:
+                subnames = [ '[%d]' % i for i in range(len(value)) ]
+            else:
+                subnames = [''] * len(value)
+
+            if self.__dict__.has_key('subnames'):
+                for i,each in enumerate(self.subnames):
+                    if len(each) > 0:
+                        subnames[i] = '.%s' % each
+
+            subdescs = [self.desc]*len(value)
+            if self.__dict__.has_key('subdescs'):
+                for i in xrange(min(len(value), len(self.subdescs))):
+                    subdescs[i] = self.subdescs[i]
+
+            for val,sname,sdesc in map(None, value, subnames, subdescs):
+                if mytotal > 0.0:
+                    mypdf = float(val) / float(mytotal)
+                    mycdf += mypdf
+                    if (self.flags & flags_pdf):
+                        p.pdf = mypdf
+                        p.cdf = mycdf
+
+                if len(sname) == 0:
+                    continue
+
+                p.name = self.name + sname
+                p.desc = sdesc
+                p.value = val
+                p.display()
+
+            if (self.flags & flags_total):
+                if (p.__dict__.has_key('pdf')): del p.__dict__['pdf']
+                if (p.__dict__.has_key('cdf')): del p.__dict__['cdf']
+                p.name = self.name + '.total'
+                p.desc = self.desc
+                p.value = mytotal
+                p.display()
+
+        else:
+            p.name = self.name
+            p.desc = self.desc
+            p.value = self.value
+            p.display()
+
diff --git a/util/stats/flags.py b/util/stats/flags.py

new file mode 100644 (file)

index 0000000..7a57e72
--- /dev/null
+++ b/util/stats/flags.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2004 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Nathan Binkert
+
+init      = 0x00000001
+printable = 0x00000002
+total     = 0x00000010
+pdf       = 0x00000020
+cdf       = 0x00000040
+dist      = 0x00000080
+nozero    = 0x00000100
+nonan     = 0x00000200
diff --git a/util/stats/info.py b/util/stats/info.py

new file mode 100644 (file)

index 0000000..a94563c
--- /dev/null
+++ b/util/stats/info.py
@@ -0,0 +1,724 @@
+from __future__ import division
+import operator, re, types
+
+source = None
+display_run = 0
+
+def issequence(t):
+    return isinstance(t, types.TupleType) or isinstance(t, types.ListType)
+
+def total(f):
+    if isinstance(f, FormulaStat):
+        v = f.value
+    else:
+        v = f
+
+    f = FormulaStat()
+    if issequence(v):
+        f.value = reduce(operator.add, v)
+    else:
+        f.value = v
+
+    return f
+
+def unaryop(op, f):
+    if isinstance(f, FormulaStat):
+        v = f.value
+    else:
+        v = f
+
+    if issequence(v):
+        return map(op, v)
+    else:
+        return op(v)
+
+def zerodiv(lv, rv):
+    if rv == 0.0:
+        return 0.0
+    else:
+        return operator.truediv(lv, rv)
+
+def wrapop(op, lv, rv):
+    if isinstance(lv, str):
+        return lv
+
+    if isinstance(rv, str):
+        return rv
+
+    return op(lv, rv)
+
+def same(lv, rv):
+    for lrun,rrun in zip(lv.keys(),rv.keys()):
+        if lrun != rrun:
+            print 'lrun != rrun'
+            print lrun, rrun
+            print lv.keys()
+            print rv.keys()
+            return False
+        for lx,rx in zip(lv[lrun].keys(),rv[rrun].keys()):
+            if lx != rx:
+                print 'lx != rx'
+                print lx, rx
+                print lv[lrun].keys()
+                print rv[rrun].keys()
+                return False
+            for ly,ry in zip(lv[lrun][lx].keys(),rv[rrun][rx].keys()):
+                if ly != ry:
+                    print 'ly != ry'
+                    print ly, ry
+                    print lv[lrun][lx].keys()
+                    print rv[rrun][rx].keys()
+                    return False
+    return True
+
+
+def binaryop(op, lf, rf):
+    result = {}
+
+    if isinstance(lf, FormulaStat) and isinstance(rf, FormulaStat):
+        lv = lf.value
+        rv = rf.value
+
+        if not same(lv, rv):
+            raise AttributeError, "run,x,y not identical"
+
+        for run in lv.keys():
+            result[run] = {}
+            for x in lv[run].keys():
+                result[run][x] = {}
+                for y in lv[run][x].keys():
+                    result[run][x][y] = wrapop(op, lv[run][x][y],
+                                               rv[run][x][y])
+    elif isinstance(lf, FormulaStat):
+        lv = lf.value
+        for run in lv.keys():
+            result[run] = {}
+            for x in lv[run].keys():
+                result[run][x] = {}
+                for y in lv[run][x].keys():
+                    result[run][x][y] = wrapop(op, lv[run][x][y], rf)
+    elif isinstance(rf, FormulaStat):
+        rv = rf.value
+        for run in rv.keys():
+            result[run] = {}
+            for x in rv[run].keys():
+                result[run][x] = {}
+                for y in rv[run][x].keys():
+                    result[run][x][y] = wrapop(op, lf, rv[run][x][y])
+
+    return result
+
+def sums(x, y):
+    if issequence(x):
+        return map(lambda x, y: x + y, x, y)
+    else:
+        return x + y
+
+def alltrue(list):
+    return reduce(lambda x, y: x and y, list)
+
+def allfalse(list):
+    return not reduce(lambda x, y: x or y, list)
+
+def enumerate(list):
+    return map(None, range(len(list)), list)
+
+def cmp(a, b):
+    if a < b:
+        return -1
+    elif a == b:
+        return 0
+    else:
+        return 1
+
+class Statistic(object):
+    def __init__(self, data):
+        self.__dict__.update(data.__dict__)
+        if not self.__dict__.has_key('value'):
+            self.__dict__['value'] = None
+        if not self.__dict__.has_key('bins'):
+            self.__dict__['bins'] = None
+        if not self.__dict__.has_key('ticks'):
+            self.__dict__['ticks'] = None
+
+    def __getattribute__(self, attr):
+        if attr == 'value':
+            if self.__dict__['value'] == None:
+                self.__dict__['value'] = self.getValue()
+            return self.__dict__['value']
+        else:
+            return super(Statistic, self).__getattribute__(attr)
+
+    def __setattr__(self, attr, value):
+        if attr == 'bins' or attr == 'ticks':
+            if attr == 'bins':
+                global db
+                if value is not None:
+                    value = db.getBin(value)
+            elif attr == 'samples' and type(value) is str:
+                value = [ int(x) for x in value.split() ]
+
+            self.__dict__[attr] = value
+            self.__dict__['value'] = None
+        else:
+            super(Statistic, self).__setattr__(attr, value)
+
+    def getValue(self):
+        raise AttributeError, 'getValue() must be defined'
+
+    def zero(self):
+        return False
+
+    def __ne__(self, other):
+        return not (self == other)
+
+    def __str__(self):
+        return '%f' % (float(self))
+
+class FormulaStat(object):
+    def __add__(self, other):
+        f = FormulaStat()
+        f.value = binaryop(operator.add, self, other)
+        return f
+    def __sub__(self, other):
+        f = FormulaStat()
+        f.value = binaryop(operator.sub, self, other)
+        return f
+    def __mul__(self, other):
+        f = FormulaStat()
+        f.value = binaryop(operator.mul, self, other)
+        return f
+    def __truediv__(self, other):
+        f = FormulaStat()
+        f.value = binaryop(zerodiv, self, other)
+        return f
+    def __mod__(self, other):
+        f = FormulaStat()
+        f.value = binaryop(operator.mod, self, other)
+        return f
+    def __radd__(self, other):
+        f = FormulaStat()
+        f.value = binaryop(operator.add, other, self)
+        return f
+    def __rsub__(self, other):
+        f = FormulaStat()
+        f.value = binaryop(operator.sub, other, self)
+        return f
+    def __rmul__(self, other):
+        f = FormulaStat()
+        f.value = binaryop(operator.mul, other, self)
+        return f
+    def __rtruediv__(self, other):
+        f = FormulaStat()
+        f.value = binaryop(zerodiv, other, self)
+        return f
+    def __rmod__(self, other):
+        f = FormulaStat()
+        f.value = binaryop(operator.mod, other, self)
+        return f
+    def __neg__(self):
+        f = FormulaStat()
+        f.value = unaryop(operator.neg, self)
+        return f
+    def __getitem__(self, idx):
+        f = FormulaStat()
+        f.value = {}
+        for key in self.value.keys():
+            f.value[key] = {}
+            f.value[key][0] = {}
+            f.value[key][0][0] = self.value[key][idx][0]
+        return f
+
+    def __float__(self):
+        if isinstance(self.value, FormulaStat):
+            return float(self.value)
+        if not self.value.has_key(display_run):
+            return (1e300*1e300)
+        if len(self.value[display_run]) == 1:
+            return self.value[display_run][0][0]
+        else:
+            #print self.value[display_run]
+            return self.value[display_run][4][0]
+            #raise ValueError
+
+    def display(self):
+        import display
+        d = display.VectorDisplay()
+        d.flags = 0
+        d.precision = 1
+        d.name = 'formula'
+        d.desc = 'formula'
+        val = self.value[display_run]
+        d.value = [ val[x][0] for x in val.keys() ]
+        d.display()
+
+
+class Scalar(Statistic,FormulaStat):
+    def getValue(self):
+        return source.data(self, self.bins)
+
+    def display(self):
+        import display
+        p = display.Print()
+        p.name = self.name
+        p.desc = self.desc
+        p.value = float(self)
+        p.flags = self.flags
+        p.precision = self.precision
+        if display.all or (self.flags & flags.printable):
+            p.display()
+
+    def comparable(self, other):
+        return self.name == other.name
+
+    def __eq__(self, other):
+        return self.value == other.value
+
+    def __isub__(self, other):
+        self.value -= other.value
+        return self
+
+    def __iadd__(self, other):
+        self.value += other.value
+        return self
+
+    def __itruediv__(self, other):
+        if not other:
+            return self
+        self.value /= other
+        return self
+
+class Vector(Statistic,FormulaStat):
+    def getValue(self):
+        return source.data(self, self.bins);
+
+    def display(self):
+        import display
+        if not display.all and not (self.flags & flags.printable):
+            return
+
+        d = display.VectorDisplay()
+        d.__dict__.update(self.__dict__)
+        d.display()
+
+    def comparable(self, other):
+        return self.name == other.name and \
+               len(self.value) == len(other.value)
+
+    def __eq__(self, other):
+        if issequence(self.value) != issequence(other.value):
+            return false
+
+        if issequence(self.value):
+            if len(self.value) != len(other.value):
+                return False
+            else:
+                for v1,v2 in zip(self.value, other.value):
+                    if v1 != v2:
+                        return False
+                return True
+        else:
+            return self.value == other.value
+
+    def __isub__(self, other):
+        self.value = binaryop(operator.sub, self.value, other.value)
+        return self
+
+    def __iadd__(self, other):
+        self.value = binaryop(operator.add, self.value, other.value)
+        return self
+
+    def __itruediv__(self, other):
+        if not other:
+            return self
+        if issequence(self.value):
+            for i in xrange(len(self.value)):
+                self.value[i] /= other
+        else:
+            self.value /= other
+        return self
+
+class Formula(Vector):
+    def getValue(self):
+        formula = re.sub(':', '__', self.formula)
+        x = eval(formula, source.stattop)
+        return x.value
+
+    def comparable(self, other):
+        return self.name == other.name and \
+               compare(self.dist, other.dist)
+
+    def __eq__(self, other):
+        return self.value == other.value
+
+    def __isub__(self, other):
+        return self
+
+    def __iadd__(self, other):
+        return self
+
+    def __itruediv__(self, other):
+        if not other:
+            return self
+        return self
+
+class SimpleDist(object):
+    def __init__(self, sums, squares, samples):
+        self.sums = sums
+        self.squares = squares
+        self.samples = samples
+
+    def getValue(self):
+        return 0.0
+
+    def display(self, name, desc, flags, precision):
+        import display
+        p = display.Print()
+        p.flags = flags
+        p.precision = precision
+
+        if self.samples > 0:
+            p.name = name + ".mean"
+            p.value = self.sums / self.samples
+            p.display()
+
+            p.name = name + ".stdev"
+            if self.samples > 1:
+                var = (self.samples * self.squares - self.sums ** 2) \
+                      / (self.samples * (self.samples - 1))
+                if var >= 0:
+                    p.value = math.sqrt(var)
+                else:
+                    p.value = 'NaN'
+            else:
+                p.value = 0.0
+            p.display()
+
+        p.name = name + ".samples"
+        p.value = self.samples
+        p.display()
+
+    def comparable(self, other):
+        return True
+
+    def __eq__(self, other):
+        return self.sums == other.sums and self.squares == other.squares and \
+               self.samples == other.samples
+
+    def __isub__(self, other):
+        self.sums -= other.sums
+        self.squares -= other.squares
+        self.samples -= other.samples
+        return self
+
+    def __iadd__(self, other):
+        self.sums += other.sums
+        self.squares += other.squares
+        self.samples += other.samples
+        return self
+
+    def __itruediv__(self, other):
+        if not other:
+            return self
+        self.sums /= other
+        self.squares /= other
+        self.samples /= other
+        return self
+
+class FullDist(SimpleDist):
+    def __init__(self, sums, squares, samples, minval, maxval,
+                 under, vec, over, min, max, bsize, size):
+        self.sums = sums
+        self.squares = squares
+        self.samples = samples
+        self.minval = minval
+        self.maxval = maxval
+        self.under = under
+        self.vec = vec
+        self.over = over
+        self.min = min
+        self.max = max
+        self.bsize = bsize
+        self.size = size
+
+    def getValue(self):
+        return 0.0
+
+    def display(self, name, desc, flags, precision):
+        import display
+        p = display.Print()
+        p.flags = flags
+        p.precision = precision
+
+        p.name = name + '.min_val'
+        p.value = self.minval
+        p.display()
+
+        p.name = name + '.max_val'
+        p.value = self.maxval
+        p.display()
+
+        p.name = name + '.underflow'
+        p.value = self.under
+        p.display()
+
+        i = self.min
+        for val in self.vec[:-1]:
+            p.name = name + '[%d:%d]' % (i, i + self.bsize - 1)
+            p.value = val
+            p.display()
+            i += self.bsize
+
+        p.name = name + '[%d:%d]' % (i, self.max)
+        p.value = self.vec[-1]
+        p.display()
+
+
+        p.name = name + '.overflow'
+        p.value = self.over
+        p.display()
+
+        SimpleDist.display(self, name, desc, flags, precision)
+
+    def comparable(self, other):
+        return self.min == other.min and self.max == other.max and \
+               self.bsize == other.bsize and self.size == other.size
+
+    def __eq__(self, other):
+        return self.sums == other.sums and self.squares == other.squares and \
+               self.samples == other.samples
+
+    def __isub__(self, other):
+        self.sums -= other.sums
+        self.squares -= other.squares
+        self.samples -= other.samples
+
+        if other.samples:
+            self.minval = min(self.minval, other.minval)
+            self.maxval = max(self.maxval, other.maxval)
+            self.under -= under
+            self.vec = map(lambda x,y: x - y, self.vec, other.vec)
+            self.over -= over
+        return self
+
+    def __iadd__(self, other):
+        if not self.samples and other.samples:
+            self = other
+            return self
+
+        self.sums += other.sums
+        self.squares += other.squares
+        self.samples += other.samples
+
+        if other.samples:
+            self.minval = min(self.minval, other.minval)
+            self.maxval = max(self.maxval, other.maxval)
+            self.under += other.under
+            self.vec = map(lambda x,y: x + y, self.vec, other.vec)
+            self.over += other.over
+        return self
+
+    def __itruediv__(self, other):
+        if not other:
+            return self
+        self.sums /= other
+        self.squares /= other
+        self.samples /= other
+
+        if self.samples:
+            self.under /= other
+            for i in xrange(len(self.vec)):
+                self.vec[i] /= other
+            self.over /= other
+        return self
+
+class Dist(Statistic):
+    def getValue(self):
+        return 0.0
+
+    def display(self):
+        import display
+        if not display.all and not (self.flags & flags.printable):
+            return
+
+        self.dist.display(self.name, self.desc, self.flags, self.precision)
+
+    def comparable(self, other):
+        return self.name == other.name and \
+               self.dist.compareable(other.dist)
+
+    def __eq__(self, other):
+        return self.dist == other.dist
+
+    def __isub__(self, other):
+        self.dist -= other.dist
+        return self
+
+    def __iadd__(self, other):
+        self.dist += other.dist
+        return self
+
+    def __itruediv__(self, other):
+        if not other:
+            return self
+        self.dist /= other
+        return self
+
+class VectorDist(Statistic):
+    def getValue(self):
+        return 0.0
+
+    def display(self):
+        import display
+        if not display.all and not (self.flags & flags.printable):
+            return
+
+        if isinstance(self.dist, SimpleDist):
+            return
+
+        for dist,sn,sd,i in map(None, self.dist, self.subnames, self.subdescs,
+                                range(len(self.dist))):
+            if len(sn) > 0:
+                name = '%s.%s' % (self.name, sn)
+            else:
+                name = '%s[%d]' % (self.name, i)
+
+            if len(sd) > 0:
+                desc = sd
+            else:
+                desc = self.desc
+
+            dist.display(name, desc, self.flags, self.precision)
+
+        if (self.flags & flags.total) or 1:
+            if isinstance(self.dist[0], SimpleDist):
+                disttotal = SimpleDist( \
+                    reduce(sums, [d.sums for d in self.dist]),
+                    reduce(sums, [d.squares for d in self.dist]),
+                    reduce(sums, [d.samples for d in self.dist]))
+            else:
+                disttotal = FullDist( \
+                    reduce(sums, [d.sums for d in self.dist]),
+                    reduce(sums, [d.squares for d in self.dist]),
+                    reduce(sums, [d.samples for d in self.dist]),
+                    min([d.minval for d in self.dist]),
+                    max([d.maxval for d in self.dist]),
+                    reduce(sums, [d.under for d in self.dist]),
+                    reduce(sums, [d.vec for d in self.dist]),
+                    reduce(sums, [d.over for d in self.dist]),
+                    dist[0].min,
+                    dist[0].max,
+                    dist[0].bsize,
+                    dist[0].size)
+
+            name = '%s.total' % (self.name)
+            desc = self.desc
+            disttotal.display(name, desc, self.flags, self.precision)
+
+    def comparable(self, other):
+        return self.name == other.name and \
+               alltrue(map(lambda x, y : x.comparable(y),
+                           self.dist,
+                           other.dist))
+
+    def __eq__(self, other):
+        return alltrue(map(lambda x, y : x == y, self.dist, other.dist))
+
+    def __isub__(self, other):
+        if issequence(self.dist) and issequence(other.dist):
+            for sd,od in zip(self.dist, other.dist):
+                sd -= od
+        else:
+            self.dist -= other.dist
+        return self
+
+    def __iadd__(self, other):
+        if issequence(self.dist) and issequence(other.dist):
+            for sd,od in zip(self.dist, other.dist):
+                sd += od
+        else:
+            self.dist += other.dist
+        return self
+
+    def __itruediv__(self, other):
+        if not other:
+            return self
+        if issequence(self.dist):
+            for dist in self.dist:
+                dist /= other
+        else:
+            self.dist /= other
+        return self
+
+class Vector2d(Statistic):
+    def getValue(self):
+        return 0.0
+
+    def display(self):
+        import display
+        if not display.all and not (self.flags & flags.printable):
+            return
+
+        d = display.VectorDisplay()
+        d.__dict__.update(self.__dict__)
+
+        if self.__dict__.has_key('ysubnames'):
+            ysubnames = list(self.ysubnames)
+            slack = self.x - len(ysubnames)
+            if slack > 0:
+                ysubnames.extend(['']*slack)
+        else:
+            ysubnames = range(self.x)
+
+        for x,sname in enumerate(ysubnames):
+            o = x * self.y
+            d.value = self.value[o:o+self.y]
+            d.name = '%s[%s]' % (self.name, sname)
+            d.display()
+
+        if self.flags & flags.total:
+            d.value = []
+            for y in range(self.y):
+                xtot = 0.0
+                for x in range(self.x):
+                    xtot += self.value[y + x * self.x]
+                d.value.append(xtot)
+
+            d.name = self.name + '.total'
+            d.display()
+
+    def comparable(self, other):
+        return self.name == other.name and self.x == other.x and \
+               self.y == other.y
+
+    def __eq__(self, other):
+        return True
+
+    def __isub__(self, other):
+        return self
+
+    def __iadd__(self, other):
+        return self
+
+    def __itruediv__(self, other):
+        if not other:
+            return self
+        return self
+
+def NewStat(data):
+    stat = None
+    if data.type == 'SCALAR':
+        stat = Scalar(data)
+    elif data.type == 'VECTOR':
+        stat = Vector(data)
+    elif data.type == 'DIST':
+        stat = Dist(data)
+    elif data.type == 'VECTORDIST':
+        stat = VectorDist(data)
+    elif data.type == 'VECTOR2D':
+        stat = Vector2d(data)
+    elif data.type == 'FORMULA':
+        stat = Formula(data)
+
+    return stat
+
diff --git a/util/stats/print.py b/util/stats/print.py

new file mode 100644 (file)

index 0000000..f4492cd
--- /dev/null
+++ b/util/stats/print.py
@@ -0,0 +1,127 @@
+all = False
+descriptions = False
+
+class Value:
+    def __init__(self, value, precision, percent = False):
+        self.value = value
+        self.precision = precision
+        self.percent = percent
+    def __str__(self):
+        if isinstance(self.value, str):
+            if self.value.lower() == 'nan':
+                value = 'NaN'
+            if self.value.lower() == 'inf':
+                value = 'Inf'
+        else:
+            if self.precision >= 0:
+                format = "%%.%df" % self.precision
+            elif self.value == 0.0:
+                format = "%.0f"
+            elif self.value % 1.0 == 0.0:
+                format = "%.0f"
+            else:
+                format = "%f"
+            value = self.value
+            if self.percent:
+                value = value * 100.0
+            value = format % value
+
+        if self.percent:
+            value = value + "%"
+
+        return value
+
+class Print:
+    def __init__(self, **vals):
+        self.__dict__.update(vals)
+
+    def __str__(self):
+        value = Value(self.value, self.precision)
+        pdf = ''
+        cdf = ''
+        if self.__dict__.has_key('pdf'):
+            pdf = Value(self.pdf, 2, True)
+        if self.__dict__.has_key('cdf'):
+            cdf = Value(self.cdf, 2, True)
+
+        output = "%-40s %12s %8s %8s" % (self.name, value, pdf, cdf)
+
+        if descriptions and self.__dict__.has_key('desc') and self.desc:
+            output = "%s # %s" % (output, self.desc)
+
+        return output
+
+    def doprint(self):
+        if display_all:
+            return True
+        if self.value == 0.0 and (self.flags & flags_nozero):
+            return False
+        if isinstance(self.value, str):
+            if self.value == 'NaN' and (self.flags & flags_nonan):
+                return False
+        return True
+
+    def display(self):
+        if self.doprint():
+            print self
+
+class VectorDisplay:
+    def display(self):
+        p = Print()
+        p.flags = self.flags
+        p.precision = self.precision
+
+        if issequence(self.value):
+            if not len(self.value):
+                return
+
+            mytotal = reduce(lambda x,y: float(x) + float(y), self.value)
+            mycdf = 0.0
+
+            value = self.value
+
+            if display_all:
+                subnames = [ '[%d]' % i for i in range(len(value)) ]
+            else:
+                subnames = [''] * len(value)
+
+            if self.__dict__.has_key('subnames'):
+                for i,each in enumerate(self.subnames):
+                    if len(each) > 0:
+                        subnames[i] = '.%s' % each
+
+            subdescs = [self.desc]*len(value)
+            if self.__dict__.has_key('subdescs'):
+                for i in xrange(min(len(value), len(self.subdescs))):
+                    subdescs[i] = self.subdescs[i]
+
+            for val,sname,sdesc in map(None, value, subnames, subdescs):
+                if mytotal > 0.0:
+                    mypdf = float(val) / float(mytotal)
+                    mycdf += mypdf
+                    if (self.flags & flags_pdf):
+                        p.pdf = mypdf
+                        p.cdf = mycdf
+
+                if len(sname) == 0:
+                    continue
+
+                p.name = self.name + sname
+                p.desc = sdesc
+                p.value = val
+                p.display()
+
+            if (self.flags & flags_total):
+                if (p.__dict__.has_key('pdf')): del p.__dict__['pdf']
+                if (p.__dict__.has_key('cdf')): del p.__dict__['cdf']
+                p.name = self.name + '.total'
+                p.desc = self.desc
+                p.value = mytotal
+                p.display()
+
+        else:
+            p.name = self.name
+            p.desc = self.desc
+            p.value = self.value
+            p.display()
+
diff --git a/util/stats/stats.py b/util/stats/stats.py

new file mode 100755 (executable)

index 0000000..1d521fd
--- /dev/null
+++ b/util/stats/stats.py
@@ -0,0 +1,478 @@
+#!/usr/bin/env python
+from __future__ import division
+import re, sys
+
+def usage():
+    print '''\
+Usage: %s [-E] [-F] [-d <db> ] [-g <get> ] [-h <host>] [-p]
+       [-s <system>] [-r <runs> ] [-u <username>] <command> [command args]
+''' % sys.argv[0]
+    sys.exit(1)
+
+def getopts(list, flags):
+    import getopt
+    try:
+        opts, args = getopt.getopt(list, flags)
+    except getopt.GetoptError:
+        usage()
+
+    return opts, args
+
+def printval(name, value, invert = False):
+    if invert and value != 0.0:
+        value = 1 / value
+
+    if value == (1e300*1e300):
+        return
+
+    if printval.mode == 'G':
+        print '%s:    %g' % (name, value)
+    elif printval.mode != 'F' and value > 1e6:
+        print '%s:    %0.5e' % (name, value)
+    else:
+        print '%s:    %f' % (name, value)
+
+printval.mode = 'G'
+
+def unique(list):
+    set = {}
+    map(set.__setitem__, list, [])
+    return set.keys()
+
+def graphdata(runs, tag, label, value):
+    import info
+    configs = ['std', 'csa', 'ht1', 'ht4', 'htx', 'ocm', 'occ', 'ocp' ]
+    benchmarks = [ 'm', 's' ]
+    dmas = [ 'x', 'd', 'b' ]
+    caches = [ '1', '2', '3', '4', '5' ]
+    systems = [ 'M' ]
+    checkpoints = [ '1' ]
+
+    names = []
+    for bench in benchmarks:
+        for dma in dmas:
+            for cache in caches:
+                for sys in systems:
+                    for cpt in checkpoints:
+                        names.append([bench, dma, cache, sys, cpt])
+
+    for bench,dma,cache,sys,cpt in names:
+        base = '%s.%s.%s.%s.%s' % (bench, dma, cache, sys, cpt)
+        fname = '/n/ziff/z/binkertn/graph/data.ibm/%s.%s.dat' % (tag, base)
+        f = open(fname, 'w')
+        print >>f, '#set TITLE = %s' % base
+        print >>f, '#set xlbl = Configuration'
+        print >>f, '#set ylbl = %s' % label
+        print >>f, '#set sublabels = %s' % ' '.join(configs)
+
+        for speed,freq in zip(['s', 'q'],['4GHz','10GHz']):
+            print >>f, '"%s"' % freq,
+            for conf in configs:
+                name = '%s.%s.%s.%s.%s.%s.%s' % (conf, bench, dma, speed,
+                                                 cache, sys, cpt)
+                run = info.source.allRunNames[name]
+                info.display_run = run.run;
+                val = float(value)
+                if val == 1e300*1e300:
+                    print >>f, 0.0,
+                else:
+                    print >>f, "%f" % val,
+            print >>f
+        f.close()
+
+def printdata(runs, value, invert = False):
+    import info
+    for run in runs:
+        info.display_run = run.run;
+        val = float(value)
+        printval(run.name, val)
+
+class CommandException(Exception):
+    pass
+
+def commands(options, command, args):
+    if command == 'database':
+        if len(args) == 0: raise CommandException
+
+        import dbinit
+        mydb = dbinit.MyDB(options)
+
+        if args[0] == 'drop':
+            if len(args) > 2: raise CommandException
+            mydb.admin()
+            mydb.drop()
+            if len(args) == 2 and args[1] == 'init':
+                mydb.create()
+                mydb.connect()
+                mydb.populate()
+            mydb.close()
+            return
+
+        if args[0] == 'init':
+            if len(args) > 1: raise CommandException
+            mydb.admin()
+            mydb.create()
+            mydb.connect()
+            mydb.populate()
+            mydb.close()
+            return
+
+        if args[0] == 'clean':
+            if len(args) > 1: raise CommandException
+            mydb.connect()
+            mydb.clean()
+            return
+
+        raise CommandException
+
+    import db, info
+    info.source = db.Database()
+    info.source.host = options.host
+    info.source.db = options.db
+    info.source.passwd = options.passwd
+    info.source.user = options.user
+    info.source.connect()
+    info.source.update_dict(globals())
+
+    system = info.source.__dict__[options.system]
+
+    if type(options.get) is str:
+        info.source.get = options.get
+
+    if options.runs is None:
+        runs = info.source.allRuns
+    else:
+        rx = re.compile(options.runs)
+        runs = []
+        for run in info.source.allRuns:
+            if rx.match(run.name):
+                runs.append(run)
+
+    info.display_run = runs[0].run
+
+    if command == 'runs':
+        user = None
+        opts, args = getopts(args, '-u')
+        if len(args):
+            raise CommandException
+        for o,a in opts:
+            if o == '-u':
+                user = a
+        info.source.listRuns(user)
+        return
+
+    if command == 'stats':
+        if len(args) == 0:
+            info.source.listStats()
+        elif len(args) == 1:
+            info.source.listStats(args[0])
+        else:
+            raise CommandException
+
+        return
+
+    if command == 'stat':
+        if len(args) != 1:
+            raise CommandException
+
+        stats = info.source.getStat(args[0])
+        for stat in stats:
+            if graph:
+                graphdata(runs, stat.name, stat.name, stat)
+            else:
+                print stat.name
+                printdata(runs, stat)
+        return
+
+    if command == 'bins':
+        if len(args) == 0:
+            info.source.listBins()
+        elif len(args) == 1:
+            info.source.listBins(args[0])
+        else:
+            raise CommandException
+
+        return
+
+    if command == 'formulas':
+        if len(args) == 0:
+            info.source.listFormulas()
+        elif len(args) == 1:
+            info.source.listFormulas(args[0])
+        else:
+            raise CommandException
+
+        return
+
+    if command == 'samples':
+        if len(args):
+            raise CommandException
+
+        info.source.listTicks(runs)
+        return
+
+    if len(args):
+        raise CommandException
+
+    if command == 'usertime':
+        import copy
+        kernel = copy.copy(system.full_cpu.numCycles)
+        kernel.bins = 'kernel'
+
+        user = copy.copy(system.full_cpu.numCycles)
+        user.bins = 'user'
+
+        if graph:
+            graphdata(runs, 'usertime', 'User Fraction',
+                      user / system.full_cpu.numCycles)
+        else:
+            printdata(runs, user / system.full_cpu.numCycles)
+        return
+
+    if command == 'ticks':
+        if binned:
+            print 'kernel ticks'
+            system.full_cpu.numCycles.bins = 'kernel'
+            printdata(runs, system.full_cpu.numCycles)
+
+            print 'idle ticks'
+            system.full_cpu.numCycles.bins = 'idle'
+            printdata(runs, system.full_cpu.numCycles)
+
+            print 'user ticks'
+            system.full_cpu.numCycles.bins = 'user'
+            printdata(runs, system.full_cpu.numCycles)
+
+            print 'total ticks'
+
+        system.full_cpu.numCycles.bins = None
+        printdata(runs, system.full_cpu.numCycles)
+        return
+
+    if command == 'packets':
+        packets = system.tsunami.nsgige.rxPackets
+        if graph:
+            graphdata(runs, 'packets', 'Packets', packets)
+        else:
+            printdata(runs, packets)
+        return
+
+    if command == 'ppt' or command == 'tpp':
+        ppt = system.tsunami.nsgige.rxPackets / sim_ticks
+        printdata(runs, ppt, command == 'tpp')
+        return
+
+    if command == 'pps':
+        pps = system.tsunami.nsgige.rxPackets / sim_seconds
+        if graph:
+            graphdata(runs, 'pps', 'Packets/s', pps)
+        else:
+            printdata(runs, pps)
+        return
+
+    if command == 'bpt' or command == 'tpb':
+        bytes = system.tsunami.nsgige.rxBytes + system.tsunami.nsgige.txBytes
+        bpt = bytes / sim_ticks * 8
+        if graph:
+            graphdata(runs, 'bpt', 'bps / Hz', bpt)
+        else:
+            printdata(runs, bpt, command == 'tpb')
+        return
+
+    if command == 'bptb' or command == 'tpbb':
+        bytes = system.tsunami.nsgige.rxBytes + system.tsunami.nsgige.txBytes
+
+        print 'kernel stats'
+        bytes.bins = 'kernel'
+        printdata(runs, bytes / ticks)
+
+        print 'idle stats'
+        bytes.bins = 'idle'
+        printdata(runs, bytes / ticks)
+
+        print 'user stats'
+        bytes.bins = 'user'
+        printdata(runs, bytes / ticks)
+
+        return
+
+    if command == 'bytes':
+        stat = system.tsunami.nsgige.rxBytes + system.tsunami.nsgige.txBytes
+
+        if binned:
+            print '%s kernel stats' % stat.name
+            stat.bins = 'kernel'
+            printdata(runs, stat)
+
+            print '%s idle stats' % stat.name
+            stat.bins = 'idle'
+            printdata(runs, stat)
+
+            print '%s user stats' % stat.name
+            stat.bins = 'user'
+            printdata(runs, stat)
+
+            print '%s total stats' % stat.name
+            stat.bins = None
+
+        printdata(runs, stat)
+        return
+
+    if command == 'rxbps':
+        gbps = system.tsunami.nsgige.rxBandwidth / 1e9
+        if graph:
+            graphdata(runs, 'rxbps', 'Bandwidth (Gbps)',  gbps)
+        else:
+            printdata(runs, gbps)
+        return
+
+    if command == 'txbps':
+        gbps = system.tsunami.nsgige.txBandwidth / 1e9
+        if graph:
+            graphdata(runs, 'txbps', 'Bandwidth (Gbps)',  gbps)
+        else:
+            printdata(runs, gbps)
+        return
+
+    if command == 'bps':
+        rxbps = system.tsunami.nsgige.rxBandwidth
+        txbps = system.tsunami.nsgige.txBandwidth
+        gbps = (rxbps + txbps) / 1e9
+        if graph:
+            graphdata(runs, 'bps', 'Bandwidth (Gbps)',  gbps)
+        else:
+            printdata(runs, gbps)
+        return
+
+    if command == 'misses':
+        stat = system.L3.overall_mshr_misses
+        if binned:
+            print '%s kernel stats' % stat.name
+            stat.bins = 'kernel'
+            printdata(runs, stat)
+
+            print '%s idle stats' % stat.name
+            stat.bins = 'idle'
+            printdata(runs, stat)
+
+            print '%s user stats' % stat.name
+            stat.bins = 'user'
+            printdata(runs, stat)
+
+            print '%s total stats' % stat.name
+
+        stat.bins = None
+        if graph:
+            graphdata(runs, 'misses', 'Overall MSHR Misses', stat)
+        else:
+            printdata(runs, stat)
+        return
+
+    if command == 'mpkb':
+        misses = system.L3.overall_mshr_misses
+        rxbytes = system.tsunami.nsgige.rxBytes
+        txbytes = system.tsunami.nsgige.txBytes
+
+        if binned:
+            print 'mpkb kernel stats'
+            misses.bins = 'kernel'
+            mpkb = misses / ((rxbytes + txbytes) / 1024)
+            printdata(runs, mpkb)
+
+            print 'mpkb idle stats'
+            misses.bins = 'idle'
+            mpkb = misses / ((rxbytes + txbytes) / 1024)
+            printdata(runs, mpkb)
+
+            print 'mpkb user stats'
+            misses.bins = 'user'
+            mpkb = misses / ((rxbytes + txbytes) / 1024)
+            printdata(runs, mpkb)
+
+            print 'mpkb total stats'
+
+        mpkb = misses / ((rxbytes + txbytes) / 1024)
+        misses.bins = None
+        if graph:
+            graphdata(runs, 'mpkb', 'Misses / KB',  mpkb)
+        else:
+            printdata(runs, mpkb)
+        return
+
+    if command == 'execute':
+        printdata(runs, system.full_cpu.ISSUE__count)
+        return
+
+    if command == 'commit':
+        printdata(runs, system.full_cpu.COM__count)
+        return
+
+    if command == 'fetch':
+        printdata(runs, system.full_cpu.FETCH__count)
+        return
+
+    if command == 'rxbpp':
+        bpp = system.tsunami.nsgige.rxBytes / system.tsunami.nsgige.rxPackets
+        printdata(run, 8 * bpp)
+        return
+
+    if command == 'txbpp':
+        bpp = system.tsunami.nsgige.txBytes / system.tsunami.nsgige.txPackets
+        printdata(run, 8 * bpp)
+        return
+
+    raise CommandException
+
+
+graph = False
+binned = False
+
+class Options: pass
+
+if __name__ == '__main__':
+    import getpass
+
+    options = Options()
+    options.host = 'zizzer.pool'
+    options.db = None
+    options.passwd = ''
+    options.user = getpass.getuser()
+    options.runs = None
+    options.system = 'client'
+    options.get = None
+
+    opts, args = getopts(sys.argv[1:], '-BEFGd:g:h:pr:s:u:')
+    for o,a in opts:
+        if o == '-B':
+            options.binned = True
+        if o == '-E':
+            printval.mode = 'E'
+        if o == '-F':
+            printval.mode = 'F'
+        if o == '-G':
+            options.graph = True;
+        if o == '-d':
+            options.db = a
+        if o == '-g':
+            options.get = a
+        if o == '-h':
+            options.host = a
+        if o == '-p':
+            options.passwd = getpass.getpass()
+        if o == '-r':
+            options.runs = a
+        if o == '-u':
+            options.user = a
+        if o == '-s':
+            options.system = a
+
+    if len(args) == 0:
+        usage()
+
+    command = args[0]
+    args = args[1:]
+
+    try:
+        commands(options, command, args)
+    except CommandException:
+        usage()
author	Nathan Binkert <binkertn@umich.edu>
	Tue, 10 Aug 2004 01:20:52 +0000 (21:20 -0400)
committer	Nathan Binkert <binkertn@umich.edu>
	Tue, 10 Aug 2004 01:20:52 +0000 (21:20 -0400)
util/stats/db.py	[new file with mode: 0644]	patch \| blob
util/stats/dbinit.py	[new file with mode: 0644]	patch \| blob
util/stats/display.py	[new file with mode: 0644]	patch \| blob
util/stats/flags.py	[new file with mode: 0644]	patch \| blob
util/stats/info.py	[new file with mode: 0644]	patch \| blob
util/stats/print.py	[new file with mode: 0644]	patch \| blob
util/stats/stats.py	[new file with mode: 0755]	patch \| blob