util: update checkpoint aggregation script
authorNilay Vaish <nilay@cs.wisc.edu>
Tue, 3 Dec 2013 16:36:03 +0000 (10:36 -0600)
committerNilay Vaish <nilay@cs.wisc.edu>
Tue, 3 Dec 2013 16:36:03 +0000 (10:36 -0600)
The checkpoint aggregation script had become outdated due to numerous changes
to checkpoints over the past couple of years.  This updates the script.  It
now supports aggregation for x86 architecture instead of alpha.  Also a couple
of new options have been added that specify the size of the memory file to be
created and whether or not the memory file should be compressed.

util/checkpoint_aggregator.py

index 14659b954e3dddcb4be57ad1f2c8a6837f0a34b8..2e14609326925a736bb42866eba2751c23d47ffa 100755 (executable)
@@ -1,5 +1,6 @@
 # Copyright (c) 2009 The Regents of The University of Michigan
 # Copyright (c) 2011 Advanced Micro Devices, Inc.
+# Copyright (c) 2013 Mark D. Hill and David A. Wood
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #
 # Authors: Lisa Hsu
+#          Nilay Vaish
 
 from ConfigParser import ConfigParser
 import gzip
 
-import sys, re, optparse, os
+import sys, re, os
 
 class myCP(ConfigParser):
     def __init__(self):
@@ -39,75 +41,43 @@ class myCP(ConfigParser):
     def optionxform(self, optionstr):
         return optionstr
 
-def aggregate(options, args):
-    merged = myCP()
+def aggregate(output_dir, cpts, no_compress, memory_size):
+    merged_config = None
     page_ptr = 0
 
-    allfiles = os.listdir(os.getcwd())
-    cpts = []
-    for arg in args:
-        found = False
-        for f in allfiles:
-            if re.compile("cpt." + arg + ".\d+").search(f):
-                found = True
-                cpts.append(f)
-                break
-        if not found:
-            print "missing checkpoint: ", arg
-            sys.exit(1)
-
-    dirname = "-".join([options.prefix, "cpt"])
-    agg_name = "-".join(args)
-    print agg_name
-    fullpath = os.path.join("..", dirname, "cpt." + agg_name + ".10000")
-    if not os.path.isdir(fullpath):
-        os.system("mkdir -p " + fullpath)
-    elif os.path.isfile(fullpath + "/system.physmem.physmem"):
-        if os.path.isfile(fullpath + "/m5.cpt"):
-            print fullpath, " already done"
-            return
-
-    myfile = open(fullpath + "/system.physmem.physmem", "wb+")
-    merged_mem = gzip.GzipFile(fileobj=myfile, mode="wb")
+    output_path = output_dir
+    if not os.path.isdir(output_path):
+        os.system("mkdir -p " + output_path)
+
+    agg_mem_file = open(output_path + "/system.physmem.store0.pmem", "wb+")
+    agg_config_file = open(output_path + "/m5.cpt", "wb+")
+
+    if not no_compress:
+        merged_mem = gzip.GzipFile(fileobj= agg_mem_file, mode="wb")
 
     max_curtick = 0
-    when = 0
-    for (i, arg) in enumerate(args):
+    num_digits = len(str(len(cpts)-1))
+
+    for (i, arg) in enumerate(cpts):
         print arg
+        merged_config = myCP()
         config = myCP()
         config.readfp(open(cpts[i] + "/m5.cpt"))
 
         for sec in config.sections():
             if re.compile("cpu").search(sec):
-                newsec = re.sub("cpu", "cpu" + str(i), sec)
-                merged.add_section(newsec)
-                if re.compile("workload$").search(sec):
-                    merged.set(newsec, "M5_pid", i)
+                newsec = re.sub("cpu", "cpu" + str(i).zfill(num_digits), sec)
+                merged_config.add_section(newsec)
 
                 items = config.items(sec)
-                if options.alpha:
-                    for item in items:
-                        if item[0] == "ppn":
-                            if config.getint(sec, "tag") != 0:
-                                merged.set(newsec, item[0], int(item[1]) + page_ptr)
-                                continue
-                        elif item[0] == "asn":
-                            tmp = re.compile("(.*).Entry(\d+)").search(sec).groups()
-                            if config.has_option(tmp[0], "nlu"):
-                                size = config.getint(tmp[0], "nlu")
-                                if int(tmp[1]) < size:
-                                    merged.set(newsec, item[0], i)
-                                    continue
-                            else:
-                                merged.set(newsec, item[0], i)
-                                continue
-                        merged.set(newsec, item[0], item[1])
-                else:a #x86
-                    for item in items:
-                        if item[0] == "paddr":
-                            merged.set(newsec, item[0], int(item[1]) + (page_ptr << 12))
-                            continue
-                        merged.set(newsec, item[0], item[1])
+                for item in items:
+                    if item[0] == "paddr":
+                        merged_config.set(newsec, item[0], int(item[1]) + (page_ptr << 12))
+                        continue
+                    merged_config.set(newsec, item[0], item[1])
+
+                if re.compile("workload.FdMap256$").search(sec):
+                    merged_config.set(newsec, "M5_pid", i)
 
             elif sec == "system":
                 pass
@@ -115,68 +85,82 @@ def aggregate(options, args):
                 tick = config.getint(sec, "curTick")
                 if tick > max_curtick:
                     max_curtick = tick
-                    when = config.getint("system.cpu.tickEvent", "_when")
             else:
-                if i == 0:
-                    merged.add_section(sec)
+                if i == len(cpts)-1:
+                    merged_config.add_section(sec)
                     for item in config.items(sec):
-                        merged.set(sec, item[0], item[1])
-                        if item[0] == "curtick":
-                            merged.optionxform(str("curTick"))
-                        elif item[0] == "numevents":
-                            merged.optionxform(str("numEvents"))
+                        merged_config.set(sec, item[0], item[1])
 
-        page_ptr = page_ptr + int(config.get("system", "pagePtr"))
+        if i != len(cpts)-1:
+            merged_config.write(agg_config_file)
 
         ### memory stuff
-        f = open(cpts[i] + "/system.physmem.physmem", "rb")
-        gf = gzip.GzipFile(fileobj=f, mode="rb")
         pages = int(config.get("system", "pagePtr"))
+        page_ptr = page_ptr + pages
         print "pages to be read: ", pages
 
+        f = open(cpts[i] + "/system.physmem.store0.pmem", "rb")
+        gf = gzip.GzipFile(fileobj=f, mode="rb")
+
         x = 0
         while x < pages:
-            if options.alpha:
-                bytesRead = gf.read(1 << 13)
-            else: #x86
-                bytesRead = gf.read(1 << 12)
-            merged_mem.write(bytesRead)
+            bytesRead = gf.read(1 << 12)
+            if not no_compress:
+                merged_mem.write(bytesRead)
+            else:
+                agg_mem_file.write(bytesRead)
             x += 1
 
         gf.close()
         f.close()
 
-    merged.add_section("system")
-    merged.set("system", "pagePtr", page_ptr)
-    merged.set("system", "nextPID", len(args))
+    merged_config.add_section("system")
+    merged_config.set("system", "pagePtr", page_ptr)
+    merged_config.set("system", "nextPID", len(cpts))
+
+    file_size = page_ptr * 4 * 1024
+    dummy_data = "".zfill(4096)
+    while file_size < memory_size:
+        if not no_compress:
+            merged_mem.write(dummy_data)
+        else:
+            agg_mem_file.write(dummy_data)
+        file_size += 4 * 1024
+        page_ptr += 1
 
     print "WARNING: "
     print "Make sure the simulation using this checkpoint has at least ",
-    if options.alpha:
-        print page_ptr, "x 8K of memory"
-    else:  # assume x86
-        print page_ptr, "x 4K of memory"
+    print page_ptr, "x 4K of memory"
+    merged_config.set("system.physmem.store0", "range_size", page_ptr * 4 * 1024)
 
-    merged.add_section("Globals")
-    merged.set("Globals", "curTick", max_curtick)
+    merged_config.add_section("Globals")
+    merged_config.set("Globals", "curTick", max_curtick)
 
-    for i in xrange(len(args)):
-        merged.set("system.cpu" + str(i) + ".tickEvent", "_when", when)
+    merged_config.write(agg_config_file)
 
-    merged.write(file(fullpath + "/m5.cpt", "wb"))
-    merged_mem.close()
-    myfile.close()
+    if not no_compress:
+        merged_mem.close()
+        agg_mem_file.close()
+    else:
+        agg_mem_file.close()
 
 if __name__ == "__main__":
-
-    parser = optparse.OptionParser()
-    parser.add_option("--prefix", type="string", default="agg")
-    # If not alpha, then assume x86.  Any other ISAs would need
-    # extra stuff in this script to appropriately parse their page tables
-    # and understand page sizes.
-    parser.add_option("--alpha", action="store_true")
-
-    (options, args) = parser.parse_args()
-
-    aggregate(options, args)
-
+    from argparse import ArgumentParser
+    parser = ArgumentParser("usage: %prog [options] <directory names which "\
+                            "hold the checkpoints to be combined>")
+    parser.add_argument("-o", "--output-dir", action="store",
+                        help="Output directory")
+    parser.add_argument("-c", "--no-compress", action="store_true")
+    parser.add_argument("--cpts", nargs='+')
+    parser.add_argument("--memory-size", action="store", type=int)
+
+    # Assume x86 ISA.  Any other ISAs would need extra stuff in this script
+    # to appropriately parse their page tables and understand page sizes.
+    options = parser.parse_args()
+    print options.cpts, len(options.cpts)
+    if len(options.cpts) <= 1:
+        parser.error("You must specify atleast two checkpoint files that "\
+                     "need to be combined.")
+
+    aggregate(options.output_dir, options.cpts, options.no_compress,
+              options.memory_size)