From fc64ab81027b6787d7543a8f94dbc07c270e4ef1 Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Sat, 5 Feb 2005 13:49:17 -0500 Subject: [PATCH] Make pbs submission scripts available to all. Fix up configuration scrupts to have better support for running on the simulation pool. --HG-- extra : convert_revision : 0178c8600b193d6c0ca69163fb735a7fa0e70782 --- util/pbs/job.py | 183 ++++++++++++++++++++++++++++++++++++++++++++ util/pbs/jobfile.py | 83 ++++++++++++++++++++ util/pbs/pbs.py | 126 ++++++++++++++++++++++++++++++ util/pbs/send.py | 169 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 561 insertions(+) create mode 100755 util/pbs/job.py create mode 100644 util/pbs/jobfile.py create mode 100755 util/pbs/pbs.py create mode 100755 util/pbs/send.py diff --git a/util/pbs/job.py b/util/pbs/job.py new file mode 100755 index 000000000..5eed0cd75 --- /dev/null +++ b/util/pbs/job.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python +# Copyright (c) 2005 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Nathan Binkert +# Steve Reinhardt +# Ali Saidi + +import os, os.path, shutil, signal, socket, sys, time +from os import environ as env +from os.path import join as joinpath, expanduser + +class rsync: + def __init__(self): + self.sudo = False + self.rsync = 'rsync' + self.compress = False + self.archive = True + self.delete = False + self.options = '' + + def do(self, src, dst): + args = [] + if self.sudo: + args.append('sudo') + + args.append(self.rsync) + if (self.archive): + args.append('-a') + if (self.compress): + args.append('-z') + if (self.delete): + args.append('--delete') + if len(self.options): + args.append(self.options) + args.append(src) + args.append(dst) + + return os.spawnvp(os.P_WAIT, args[0], args) + +def cleandir(dir): + for root, dirs, files in os.walk(dir, False): + for name in files: + os.remove(joinpath(root, name)) + for name in dirs: + os.rmdir(joinpath(root, name)) + +def date(): + return time.strftime('%a %b %e %H:%M:%S %Z %Y', time.localtime()) + +def remfile(file): + if os.path.isfile(file): + os.unlink(file) + +def readval(filename): + file = open(filename, 'r') + value = file.readline().strip() + file.close() + return value + +if __name__ == '__main__': + rootdir = env.setdefault('ROOTDIR', os.getcwd()) + jobid = env['PBS_JOBID'] + jobname = env['PBS_JOBNAME'] + jobdir = joinpath(rootdir, jobname) + basedir = joinpath(rootdir, 'Base') + user = env['USER'] + + env['POOLJOB'] = 'True' + env['OUTPUT_DIR'] = jobdir + env['JOBFILE'] = joinpath(basedir, 'test.py') + env['JOBNAME'] = jobname + + def echofile(filename, string): + try: + f = file(joinpath(jobdir, filename), 'w') + print >>f, string + f.flush() + f.close() + except IOError,e: + sys.exit(e) + + if os.path.isdir("/work"): + workbase = "/work" + else: + workbase = "/tmp/" + + workdir = joinpath(workbase, '%s.%s' % (user, jobid)) + + os.umask(0022) + + echofile('.start', date()) + echofile('.jobid', jobid) + echofile('.host', socket.gethostname()) + + if os.path.isdir(workdir): + cleandir(workdir) + else: + os.mkdir(workdir) + + if os.path.isdir('/z/dist'): + sync = rsync() + sync.delete = True + sync.sudo = True + sync.do('poolfs::dist/m5/', '/z/dist/m5/') + + try: + os.chdir(workdir) + except OSError,e: + sys.exit(e) + + os.symlink(joinpath(jobdir, 'output'), 'status.out') + + args = [ joinpath(basedir, 'm5'), joinpath(basedir, 'run.mpy') ] + if not len(args): + sys.exit("no arguments") + + print 'starting job... %s' % date() + print ' '.join(args) + print + sys.stdout.flush() + + childpid = os.fork() + if not childpid: + # Execute command + sys.stdin.close() + fd = os.open(joinpath(jobdir, "output"), + os.O_WRONLY | os.O_CREAT | os.O_TRUNC) + os.dup2(fd, sys.stdout.fileno()) + os.dup2(fd, sys.stderr.fileno()) + os.execvp(args[0], args) + + def handler(signum, frame): + if childpid != 0: + os.kill(childpid, signum) + + signal.signal(signal.SIGHUP, handler) + signal.signal(signal.SIGINT, handler) + signal.signal(signal.SIGQUIT, handler) + signal.signal(signal.SIGTERM, handler) + signal.signal(signal.SIGSTOP, handler) + signal.signal(signal.SIGCONT, handler) + signal.signal(signal.SIGUSR1, handler) + signal.signal(signal.SIGUSR2, handler) + + done = 0 + while not done: + try: + thepid,ec = os.waitpid(childpid, 0) + if ec: + print 'Exit code ', ec + echofile('.failure', date()) + else: + echofile('.success', date()) + done = 1 + except OSError: + pass + + print '\njob complete... %s' % date() + echofile('.stop', date()) diff --git a/util/pbs/jobfile.py b/util/pbs/jobfile.py new file mode 100644 index 000000000..570faa61b --- /dev/null +++ b/util/pbs/jobfile.py @@ -0,0 +1,83 @@ +# Copyright (c) 2005 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Nathan Binkert + +from os.path import expanduser +def crossproduct(options): + number = len(options) + indexes = [ 0 ] * number + maxes = [ len(opt) for opt in options ] + def next(): + for i in xrange(number - 1, -1, -1): + indexes[i] += 1 + if indexes[i] < maxes[i]: + return False + + indexes[i] = 0 + return True + + done = False + while not done: + result = [] + for i in xrange(number): + result.append(options[i][indexes[i]]) + yield result + done = next() + +class JobFile(object): + def __init__(self, file): + self.data = {} + execfile(expanduser(file), self.data) + self.options = self.data['options'] + self.environment = self.data['environment'] + self.jobinfo = {} + self.jobs = [] + for job in crossproduct(self.options): + jobname = '.'.join([ id[0] for id in job ]) + self.jobs.append(jobname) + list = [] + for info in job: + for item in info[1:]: + list.append(item) + self.jobinfo[jobname] = list + + def env(self, jobname): + env = {} + for key,val in self.jobinfo[jobname]: + env[key] = val + + for key,val in self.environment: + env[key] = val + return env + + def printinfo(self, jobname): + print '%s:' % jobname + for key,val in self.jobinfo[jobname]: + print ' %s = %s' % (key, val) + + for key,val in self.environment: + print ' %s = %s' % (key, val) diff --git a/util/pbs/pbs.py b/util/pbs/pbs.py new file mode 100755 index 000000000..a71dbbf8e --- /dev/null +++ b/util/pbs/pbs.py @@ -0,0 +1,126 @@ +# Copyright (c) 2005 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Nathan Binkert + +import os, re, sys + +def ssh(host, script, tty = False, user = ''): + args = [ 'ssh', '-x' ] + if user: + args.append('-l' + user) + if tty: + args.append('-t') + args.append(host) + args.append(script) + + return os.spawnvp(os.P_WAIT, args[0], args) + +class qsub: + def __init__(self): + self.hold = False + self.join = False + self.keep_stdout = False + self.keep_stderr = False + self.node_type = '' + self.mail_abort = False + self.mail_begin = False + self.mail_end = False + self.name = '' + self.stdout = '' + self.priority = 0 + self.queue = '' + self.pbshost = '' + self.qsub = 'qsub' + self.env = {} + self.onlyecho = False + self.verbose = False + + def do(self, script, ): + args = [self.qsub] + + if self.env: + arg = '-v' + arg += ','.join([ '%s=%s' % i for i in self.env.iteritems() ]) + args.append(arg) + + if self.hold: + args.append('-h') + + if len(self.stdout): + args.append('-olocalhost:' + self.stdout) + + if self.keep_stdout and self.keep_stderr: + args.append('-koe') + elif self.keep_stdout: + args.append('-ko') + elif self.keep_stderr: + args.append('-ke') + else: + args.append('-kn') + + if self.join: + args.append('-joe') + + if len(self.node_type): + args.append('-lnodes=' + self.node_type) + + if self.mail_abort or self.mail_begin or self.mail_end: + flags = '' + if self.mail_abort: + flags.append('a') + if self.mail_begin: + flags.append('b') + if self.mail_end: + flags.append('e') + if len(flags): + args.append('-m ' + flags) + + if len(self.name): + args.append("-N%s" % self.name) + + if self.priority != 0: + args.append('-p' + self.priority) + + if len(self.queue): + args.append('-q' + self.queue) + + args.append(script) + + if self.verbose or self.onlyecho: + print >>sys.stderr, 'PBS Command: ', ' '.join(args) + + if self.onlyecho: + return 0 + + print >>sys.stderr, 'PBS Jobid: ', + + ec = os.spawnvp(os.P_WAIT, args[0], args) + + if ec != 0 and len(self.pbshost): + ec = ssh(self.pbshost, ' '.join(args)) + + return ec diff --git a/util/pbs/send.py b/util/pbs/send.py new file mode 100755 index 000000000..c0c56d98b --- /dev/null +++ b/util/pbs/send.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python +# Copyright (c) 2005 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Ali Saidi +# Nathan Binkert + +import os, os.path, re, sys +from os import environ as env, listdir +from os.path import basename, isdir, isfile, islink, join as joinpath +from filecmp import cmp as filecmp +from shutil import copyfile + +progname = basename(sys.argv[0]) +usage = """\ +Usage: + %(progname)s [-c] [-e] [-f] [-q queue] [-v] + -c clean directory if job can be run + -e only echo pbs command info, don't actually send the job + -f force the job to run regardless of state + -q submit job to the named queue + -v be verbose + + %(progname)s -l [-v] + -l list job names, don't submit + -v be verbose (list job parameters) + + %(progname)s -h + -h display this help +""" % locals() + +try: + import getopt + opts, args = getopt.getopt(sys.argv[1:], '-cefhlq:v') +except getopt.GetoptError: + sys.exit(usage) + +clean = False +onlyecho = False +exprs = [] +force = False +listonly = False +queue = '' +verbose = False +for o,a in opts: + if o == '-c': + clean = True + if o == '-e': + onlyecho = True + if o == '-f': + force = True + if o == '-h': + print usage + sys.exit(0) + if o == '-l': + listonly = True + if o == '-q': + queue = a + if o == '-v': + verbose = True + +for arg in args: + exprs.append(re.compile(arg)) + +if not listonly and not onlyecho and isdir('Link'): + print 'Checking for outdated files in Link directory' + entries = listdir('Link') + for entry in entries: + link = joinpath('Link', entry) + if not islink(link): + continue + + base = joinpath('Base', entry) + if not isfile(base) or not filecmp(link, base): + print '%s is different than source %s...copying' % (base, link) + copyfile(link, base) + +import job, jobfile, pbs + +test = jobfile.JobFile(joinpath('Base', 'test.py')) + +joblist = [] +for jobname in test.jobs: + if not exprs: + joblist.append(jobname) + continue + + for expr in exprs: + if expr.match(jobname): + joblist.append(jobname) + break + +if listonly: + if verbose: + for jobname in joblist: + test.printinfo(jobname) + else: + for jobname in joblist: + print jobname + sys.exit(0) + +if not onlyecho: + jl = [] + for jobname in joblist: + if os.path.exists(jobname): + if not force: + if os.path.isfile(joinpath(jobname, '.success')): + continue + + if os.path.isfile(joinpath(jobname, '.start')) and \ + not os.path.isfile(joinpath(jobname, '.stop')): + continue + + if not clean: + sys.exit('job directory not clean!') + + job.cleandir(jobname) + else: + os.mkdir(jobname) + jl.append(jobname) + joblist = jl + +rootdir = re.sub(r'^/\.automount/', r'/n/', os.getcwd()) +for jobname in joblist: + jobdir = joinpath(rootdir, jobname) + + if not onlyecho and not os.path.isdir(jobdir): + sys.exit('%s is not a directory. Cannot build job' % jobdir) + + print >>sys.stderr, 'Job name: %s' % jobname + print >>sys.stderr, 'Job directory: %s' % jobdir + + qsub = pbs.qsub() + qsub.pbshost = 'simpool.eecs.umich.edu' + qsub.stdout = joinpath(jobdir, 'jobout') + qsub.name = jobname + qsub.join = True + qsub.node_type = 'FAST' + qsub.onlyecho = onlyecho + qsub.env['ROOTDIR'] = rootdir + qsub.verbose = verbose + if len(queue): + qsub.queue = queue + + qsub.do(joinpath('Base', 'job.py')) + print >>sys.stderr, '' -- 2.30.2