From 62425b7a0724033bbd50170400ad2ff83ad57429 Mon Sep 17 00:00:00 2001 From: Nilay Vaish Date: Mon, 6 Aug 2012 18:14:32 -0500 Subject: [PATCH] Simulation.py: move code related to checkpointing to functions This patch moves the code related to checkpointing from the run() function to several different functions. The aim is to make the code more manageable. No functionality changes are expected, but since the code is kind of unruly, it is possible that some change might have creeped in. --- configs/common/Simulation.py | 270 +++++++++++++++++++---------------- 1 file changed, 147 insertions(+), 123 deletions(-) diff --git a/configs/common/Simulation.py b/configs/common/Simulation.py index 7d2ba6735..72c99c20c 100644 --- a/configs/common/Simulation.py +++ b/configs/common/Simulation.py @@ -99,6 +99,143 @@ def setWorkCountOptions(system, options): if options.work_cpus_checkpoint_count != None: system.work_cpus_ckpt_count = options.work_cpus_checkpoint_count +def findCptDir(options, maxtick, cptdir, testsys): + """Figures out the directory from which the checkpointed state is read. + + There are two different ways in which the directories holding checkpoints + can be named -- + 1. cpt.. + 2. cpt. + + This function parses through the options to figure out which one of the + above should be used for selecting the checkpoint, and then figures out + the appropriate directory. + + It also sets the value of the maximum tick value till which the simulation + will run. + """ + + from os.path import isdir, exists + from os import listdir + import re + + if not isdir(cptdir): + fatal("checkpoint dir %s does not exist!", cptdir) + + if options.at_instruction or options.simpoint: + inst = options.checkpoint_restore + if options.simpoint: + # assume workload 0 has the simpoint + if testsys.cpu[0].workload[0].simpoint == 0: + fatal('Unable to find simpoint') + inst += int(testsys.cpu[0].workload[0].simpoint) + + checkpoint_dir = joinpath(cptdir, "cpt.%s.%s" % (options.bench, inst)) + if not exists(checkpoint_dir): + fatal("Unable to find checkpoint directory %s", checkpoint_dir) + else: + dirs = listdir(cptdir) + expr = re.compile('cpt\.([0-9]*)') + cpts = [] + for dir in dirs: + match = expr.match(dir) + if match: + cpts.append(match.group(1)) + + cpts.sort(lambda a,b: cmp(long(a), long(b))) + + cpt_num = options.checkpoint_restore + if cpt_num > len(cpts): + fatal('Checkpoint %d not found', cpt_num) + + maxtick = maxtick - int(cpts[cpt_num - 1]) + checkpoint_dir = joinpath(cptdir, "cpt.%s" % cpts[cpt_num - 1]) + + return maxtick, checkpoint_dir + +def scriptCheckpoints(options): + if options.at_instruction or options.simpoint: + checkpoint_inst = int(options.take_checkpoints) + + # maintain correct offset if we restored from some instruction + if options.checkpoint_restore != None: + checkpoint_inst += options.checkpoint_restore + + print "Creating checkpoint at inst:%d" % (checkpoint_inst) + exit_event = m5.simulate() + exit_cause = exit_event.getCause() + print "exit cause = %s" % exit_cause + + # skip checkpoint instructions should they exist + while exit_cause == "checkpoint": + exit_event = m5.simulate() + exit_cause = exit_event.getCause() + + if exit_cause == "a thread reached the max instruction count": + m5.checkpoint(joinpath(cptdir, "cpt.%s.%d" % \ + (options.bench, checkpoint_inst))) + print "Checkpoint written." + + else: + when, period = options.take_checkpoints.split(",", 1) + when = int(when) + period = int(period) + + exit_event = m5.simulate(when) + exit_cause = exit_event.getCause() + while exit_cause == "checkpoint": + exit_event = m5.simulate(when - m5.curTick()) + exit_cause = exit_event.getCause() + + if exit_cause == "simulate() limit reached": + m5.checkpoint(joinpath(cptdir, "cpt.%d")) + num_checkpoints += 1 + + sim_ticks = when + num_checkpoints = 0 + max_checkpoints = options.max_checkpoints + + while num_checkpoints < max_checkpoints and \ + exit_cause == "simulate() limit reached": + if (sim_ticks + period) > maxtick: + exit_event = m5.simulate(maxtick - sim_ticks) + exit_cause = exit_event.getCause() + break + else: + exit_event = m5.simulate(period) + exit_cause = exit_event.getCause() + sim_ticks += period + while exit_event.getCause() == "checkpoint": + exit_event = m5.simulate(sim_ticks - m5.curTick()) + if exit_event.getCause() == "simulate() limit reached": + m5.checkpoint(joinpath(cptdir, "cpt.%d")) + num_checkpoints += 1 + + return exit_cause + +def benchCheckpoints(options, maxtick, cptdir): + if options.fast_forward: + m5.stats.reset() + + print "**** REAL SIMULATION ****" + exit_event = m5.simulate(maxtick) + exit_cause = exit_event.getCause() + + num_checkpoints = 0 + max_checkpoints = options.max_checkpoints + + while exit_cause == "checkpoint": + m5.checkpoint(joinpath(cptdir, "cpt.%d")) + num_checkpoints += 1 + if num_checkpoints == max_checkpoints: + exit_cause = "maximum %d checkpoints dropped" % max_checkpoints + break + + exit_event = m5.simulate(maxtick - m5.curTick()) + exit_cause = exit_event.getCause() + + return exit_cause + def run(options, root, testsys, cpu_class): if options.maxtick: maxtick = options.maxtick @@ -123,7 +260,6 @@ def run(options, root, testsys, cpu_class): fatal("Must specify --caches when using --standard-switch") np = options.num_cpus - max_checkpoints = options.max_checkpoints switch_cpus = None if options.prog_interval: @@ -229,45 +365,7 @@ def run(options, root, testsys, cpu_class): checkpoint_dir = None if options.checkpoint_restore != None: - from os.path import isdir, exists - from os import listdir - import re - - if not isdir(cptdir): - fatal("checkpoint dir %s does not exist!", cptdir) - - if options.at_instruction or options.simpoint: - inst = options.checkpoint_restore - if options.simpoint: - # assume workload 0 has the simpoint - if testsys.cpu[0].workload[0].simpoint == 0: - fatal('Unable to find simpoint') - inst += int(testsys.cpu[0].workload[0].simpoint) - - checkpoint_dir = joinpath(cptdir, - "cpt.%s.%s" % (options.bench, inst)) - if not exists(checkpoint_dir): - fatal("Unable to find checkpoint directory %s", checkpoint_dir) - else: - dirs = listdir(cptdir) - expr = re.compile('cpt\.([0-9]*)') - cpts = [] - for dir in dirs: - match = expr.match(dir) - if match: - cpts.append(match.group(1)) - - cpts.sort(lambda a,b: cmp(long(a), long(b))) - - cpt_num = options.checkpoint_restore - - if cpt_num > len(cpts): - fatal('Checkpoint %d not found', cpt_num) - - ## Adjust max tick based on our starting tick - maxtick = maxtick - int(cpts[cpt_num - 1]) - checkpoint_dir = joinpath(cptdir, "cpt.%s" % cpts[cpt_num - 1]) - + maxtick, checkpoint_dir = findCptDir(options, maxtick, cptdir, testsys) m5.instantiate(checkpoint_dir) if options.standard_switch or cpu_class: @@ -310,9 +408,6 @@ def run(options, root, testsys, cpu_class): m5.switchCpus(switch_cpu_list1) m5.resume(testsys) - num_checkpoints = 0 - exit_cause = '' - # If we're taking and restoring checkpoints, use checkpoint_dir # option only for finding the checkpoints to restore from. This # lets us test checkpointing by restoring from one set of @@ -323,88 +418,17 @@ def run(options, root, testsys, cpu_class): else: cptdir = getcwd() - # Checkpoints being taken via the command line at and at - # subsequent periods of . Checkpoint instructions - # received from the benchmark running are ignored and skipped in - # favor of command line checkpoint instructions. if options.take_checkpoints != None : - if options.at_instruction or options.simpoint: - checkpoint_inst = int(options.take_checkpoints) - - # maintain correct offset if we restored from some instruction - if options.checkpoint_restore != None: - checkpoint_inst += options.checkpoint_restore - - print "Creating checkpoint at inst:%d" % (checkpoint_inst) - exit_event = m5.simulate() - print "exit cause = %s" % (exit_event.getCause()) - - # skip checkpoint instructions should they exist - while exit_event.getCause() == "checkpoint": - exit_event = m5.simulate() - - if exit_event.getCause() == \ - "a thread reached the max instruction count": - m5.checkpoint(joinpath(cptdir, "cpt.%s.%d" % \ - (options.bench, checkpoint_inst))) - print "Checkpoint written." - num_checkpoints += 1 - - if exit_event.getCause() == "user interrupt received": - exit_cause = exit_event.getCause(); - else: - when, period = options.take_checkpoints.split(",", 1) - when = int(when) - period = int(period) - - exit_event = m5.simulate(when) - while exit_event.getCause() == "checkpoint": - exit_event = m5.simulate(when - m5.curTick()) - - if exit_event.getCause() == "simulate() limit reached": - m5.checkpoint(joinpath(cptdir, "cpt.%d")) - num_checkpoints += 1 - - sim_ticks = when - exit_cause = "maximum %d checkpoints dropped" % max_checkpoints - while num_checkpoints < max_checkpoints and \ - exit_event.getCause() == "simulate() limit reached": - if (sim_ticks + period) > maxtick: - exit_event = m5.simulate(maxtick - sim_ticks) - exit_cause = exit_event.getCause() - break - else: - exit_event = m5.simulate(period) - sim_ticks += period - while exit_event.getCause() == "checkpoint": - exit_event = m5.simulate(sim_ticks - m5.curTick()) - if exit_event.getCause() == "simulate() limit reached": - m5.checkpoint(joinpath(cptdir, "cpt.%d")) - num_checkpoints += 1 - - if exit_event.getCause() != "simulate() limit reached": - exit_cause = exit_event.getCause(); - - else: # no checkpoints being taken via this script - if options.fast_forward: - m5.stats.reset() - print "**** REAL SIMULATION ****" - exit_event = m5.simulate(maxtick) - - while exit_event.getCause() == "checkpoint": - m5.checkpoint(joinpath(cptdir, "cpt.%d")) - num_checkpoints += 1 - if num_checkpoints == max_checkpoints: - exit_cause = "maximum %d checkpoints dropped" % max_checkpoints - break - - exit_event = m5.simulate(maxtick - m5.curTick()) - exit_cause = exit_event.getCause() + # Checkpoints being taken via the command line at and at + # subsequent periods of . Checkpoint instructions + # received from the benchmark running are ignored and skipped in + # favor of command line checkpoint instructions. + exit_cause = scriptCheckpoints(options) + else: + # If checkpoints are being taken, then the checkpoint instruction + # will occur in the benchmark code it self. + exit_cause = benchCheckpoints(options, maxtick, cptdir) - if exit_cause == '': - exit_cause = exit_event.getCause() print 'Exiting @ tick %i because %s' % (m5.curTick(), exit_cause) - if options.checkpoint_at_end: m5.checkpoint(joinpath(cptdir, "cpt.%d")) - -- 2.30.2