+++ /dev/null
-# Copyright (c) 2006 The Regents of The University of Michigan
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import os, popen2, re, sys
-
-class MyPOpen(object):
- def __init__(self, cmd, input = None, output = None, bufsize = -1):
- self.status = -1
-
- if input is None:
- p2c_read, p2c_write = os.pipe()
- self.tochild = os.fdopen(p2c_write, 'w', bufsize)
- else:
- p2c_write = None
- if isinstance(input, file):
- p2c_read = input.fileno()
- elif isinstance(input, str):
- input = file(input, 'r')
- p2c_read = input.fileno()
- elif isinstance(input, int):
- p2c_read = input
- else:
- raise AttributeError
-
- if output is None:
- c2p_read, c2p_write = os.pipe()
- self.fromchild = os.fdopen(c2p_read, 'r', bufsize)
- else:
- c2p_read = None
- if isinstance(output, file):
- c2p_write = output.fileno()
- elif isinstance(output, str):
- output = file(output, 'w')
- c2p_write = output.fileno()
- elif isinstance(output, int):
- c2p_write = output
- else:
- raise AttributeError
-
- self.pid = os.fork()
- if self.pid == 0:
- os.dup2(p2c_read, sys.stdin.fileno())
- os.dup2(c2p_write, sys.stdout.fileno())
- os.dup2(c2p_write, sys.stderr.fileno())
- try:
- os.execvp(cmd[0], cmd)
- finally:
- os._exit(1)
-
- os.close(p2c_read)
- os.close(c2p_write)
-
- def poll(self):
- if self.status < 0:
- pid, status = os.waitpid(self.pid, os.WNOHANG)
- if pid == self.pid:
- self.status = status
- return self.status
-
- def wait(self):
- if self.status < 0:
- pid, status = os.waitpid(self.pid, 0)
- if pid == self.pid:
- self.status = status
- return self.status
-
-
-class oarsub:
- def __init__(self):
- self.walltime = None
- self.queue = None
- self.properties = None
-
- # OAR 2.0 parameters only!
- self.name = None
- self.afterok = None
- self.notify = None
- self.stderr = None
- self.stdout = None
-
-
- self.oarhost = None
- self.oarsub = 'oarsub'
-
- self.jobid = re.compile('IdJob = (\S+)')
- #self.outfile = open("jobnames.dat", "a+")
-
- def build(self, script, args = []):
- self.cmd = [ self.oarsub ]
-
- print "args:", args
- print "script:", script
- if self.properties:
- self.cmd.append('-p"%s"' % self.properties )
-
- if self.queue:
- self.cmd.append('-q "%s"' % self.queue)
-
- if self.walltime:
- self.cmd.append('-l walltime=%s' % self.walltime)
-
- if script[0] != "/":
- self.script = os.getcwd()
- else:
- self.script = script
-
- self.cmd.extend(args)
- self.cmd.append(self.script)
- #cmd = [ 'ssh', '-x', self.oarhost, '"cd %s; %s"' % (os.getcwd(), self.command) ]
- self.command = ' '.join(self.cmd)
-
- print "command: [%s]" % self.command
-
- def do(self):
- oar = MyPOpen(self.cmd)
- self.result = oar.fromchild.read()
- ec = oar.wait()
-
- if ec != 0 and self.oarhost:
- pstdin, pstdout = os.popen4(self.command)
- self.result = pstdout.read()
-
- jobid = self.jobid.match(self.result)
- if jobid == None:
- print "Couldn't get jobid from [%s]" % self.result
- sys.exit(1)
- else:
- #self.outfile.write("%d %s\n" %(int(jobid.group(1)), self.name));
- #self.outfile.flush()
- self.result = jobid.group(1)
-
- return 0
-
-class qsub:
- def __init__(self):
- self.afterok = None
- self.hold = False
- self.join = False
- self.keep_stdout = False
- self.keep_stderr = False
- self.node_type = None
- self.mail_abort = False
- self.mail_begin = False
- self.mail_end = False
- self.name = None
- self.stdout = None
- self.priority = None
- self.queue = None
- self.pbshost = None
- self.qsub = 'qsub'
- self.env = {}
-
- def build(self, script, args = []):
- self.cmd = [ self.qsub ]
-
- if self.env:
- arg = '-v'
- arg += ','.join([ '%s=%s' % i for i in self.env.iteritems() ])
- self.cmd.append(arg)
-
- if self.hold:
- self.cmd.append('-h')
-
- if self.stdout:
- self.cmd.append('-olocalhost:' + self.stdout)
-
- if self.keep_stdout and self.keep_stderr:
- self.cmd.append('-koe')
- elif self.keep_stdout:
- self.cmd.append('-ko')
- elif self.keep_stderr:
- self.cmd.append('-ke')
- else:
- self.cmd.append('-kn')
-
- if self.join:
- self.cmd.append('-joe')
-
- if self.node_type:
- self.cmd.append('-lnodes=' + self.node_type)
-
- if self.mail_abort or self.mail_begin or self.mail_end:
- flags = ''
- if self.mail_abort:
- flags.append('a')
- if self.mail_begin:
- flags.append('b')
- if self.mail_end:
- flags.append('e')
- if len(flags):
- self.cmd.append('-m ' + flags)
- else:
- self.cmd.append('-mn')
-
- if self.name:
- self.cmd.append("-N%s" % self.name)
-
- if self.priority:
- self.cmd.append('-p' + self.priority)
-
- if self.queue:
- self.cmd.append('-q' + self.queue)
-
- if self.afterok:
- self.cmd.append('-Wdepend=afterok:%s' % self.afterok)
-
- self.cmd.extend(args)
- self.script = script
- self.command = ' '.join(self.cmd + [ self.script ])
-
- def do(self):
- pbs = MyPOpen(self.cmd + [ self.script ])
- self.result = pbs.fromchild.read()
- ec = pbs.wait()
-
- if ec != 0 and self.pbshost:
- cmd = ' '.join(self.cmd + [ '-' ])
- cmd = [ 'ssh', '-x', self.pbshost, cmd ]
- self.command = ' '.join(cmd)
- ssh = MyPOpen(cmd, input = self.script)
- self.result = ssh.fromchild.read()
- ec = ssh.wait()
-
- return ec
+++ /dev/null
-#!/usr/bin/env python2.7
-# Copyright (c) 2006 The Regents of The University of Michigan
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import os, os.path, shutil, signal, socket, sys
-from os import environ as env
-from os.path import join as joinpath, expanduser
-
-def date():
- import time
- return time.strftime('%a %b %e %H:%M:%S %Z %Y', time.localtime())
-
-def cleandir(dir):
- for root, dirs, files in os.walk(dir, False):
- for name in files:
- os.remove(joinpath(root, name))
- for name in dirs:
- os.rmdir(joinpath(root, name))
-
-class rsync:
- def __init__(self):
- self.sudo = False
- self.rsync = 'rsync'
- self.compress = False
- self.archive = True
- self.delete = False
- self.options = ''
-
- def do(self, src, dst):
- args = []
- if self.sudo:
- args.append('sudo')
-
- args.append(self.rsync)
- if (self.archive):
- args.append('-a')
- if (self.compress):
- args.append('-z')
- if (self.delete):
- args.append('--delete')
- if len(self.options):
- args.append(self.options)
- args.append(src)
- args.append(dst)
-
- return os.spawnvp(os.P_WAIT, args[0], args)
-
-class JobDir(object):
- def __init__(self, dir):
- self.dir = dir
-
- def file(self, filename):
- return joinpath(self.dir, filename)
-
- def create(self):
- if os.path.exists(self.dir):
- if not os.path.isdir(self.dir):
- sys.exit('%s is not a directory. Cannot build job' % self.dir)
- else:
- os.mkdir(self.dir)
-
- def exists(self):
- return os.path.isdir(self.dir)
-
- def clean(self):
- cleandir(self.dir)
-
- def hasfile(self, filename):
- return os.path.isfile(self.file(filename))
-
- def echofile(self, filename, string):
- filename = self.file(filename)
- try:
- f = file(filename, 'w')
- print >>f, string
- f.flush()
- f.close()
- except IOError,e:
- sys.exit(e)
-
- def rmfile(self, filename):
- filename = self.file(filename)
- if os.path.isfile(filename):
- os.unlink(filename)
-
- def readval(self, filename):
- filename = self.file(filename)
- f = file(filename, 'r')
- value = f.readline().strip()
- f.close()
- return value
-
- def setstatus(self, string):
- filename = self.file('.status')
- try:
- f = file(filename, 'a')
- print >>f, string
- f.flush()
- f.close()
- except IOError,e:
- sys.exit(e)
-
- def getstatus(self):
- filename = self.file('.status')
- try:
- f = file(filename, 'r')
- except IOError, e:
- return 'none'
-
- # fast forward to the end
- for line in f: pass
-
- # the first word on the last line is the status
- return line.split(' ')[0]
-
- def __str__(self):
- return self.dir
-
-if __name__ == '__main__':
- import platform
- binaries = { 'i686' : 'm5.i386',
- 'x86_64' : 'm5.amd64' }
- binary = binaries[platform.machine()]
-
- cwd = os.getcwd()
- rootdir = env.setdefault('ROOTDIR', os.path.dirname(cwd))
- oar_jobid = int(env['OAR_JOBID'])
- oar_jobname = os.path.basename(cwd)
- #pbs_jobname = env['PBS_JOBNAME']
- basedir = joinpath(rootdir, 'Base')
- jobname = env.setdefault('JOBNAME', oar_jobname)
- jobfile = env.setdefault('JOBFILE', joinpath(rootdir, 'Test.py'))
- outdir = env.setdefault('OUTPUT_DIR', cwd)
- env['POOLJOB'] = 'True'
-
- if os.path.isdir("/work"):
- workbase = "/work"
- else:
- workbase = "/tmp/"
-
- workdir = joinpath(workbase, '%s.%s' % (env['USER'], oar_jobid))
- host = socket.gethostname()
-
- os.umask(0022)
-
- jobdir = JobDir(outdir)
-
- started = date()
- jobdir.echofile('.running', started)
- jobdir.rmfile('.queued')
- jobdir.echofile('.host', host)
-
- jobdir.setstatus('running on %s on %s' % (host, started))
-
- if os.path.isdir(workdir):
- cleandir(workdir)
- else:
- os.mkdir(workdir)
-
- if False and os.path.isdir('/z/dist'):
- sync = rsync()
- sync.delete = True
- sync.sudo = True
- sync.do('poolfs::dist/m5/', '/z/dist/m5/')
-
- try:
- os.chdir(workdir)
- except OSError,e:
- sys.exit(e)
-
- os.symlink(jobdir.file('output'), 'status.out')
-
- args = [ joinpath(basedir, binary), joinpath(basedir, 'run.py') ]
- if not len(args):
- sys.exit("no arguments")
-
- print 'starting job... %s' % started
- print ' '.join(args)
- print
- sys.stdout.flush()
-
- childpid = os.fork()
- if not childpid:
- # Execute command
- sys.stdin.close()
- fd = os.open(jobdir.file("output"),
- os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
- os.dup2(fd, sys.stdout.fileno())
- os.dup2(fd, sys.stderr.fileno())
- os.execvp(args[0], args)
-
- def handler(signum, frame):
- if childpid != 0:
- os.kill(childpid, signum)
-
- signal.signal(signal.SIGHUP, handler)
- signal.signal(signal.SIGINT, handler)
- signal.signal(signal.SIGQUIT, handler)
- signal.signal(signal.SIGTERM, handler)
- signal.signal(signal.SIGCONT, handler)
- signal.signal(signal.SIGUSR1, handler)
- signal.signal(signal.SIGUSR2, handler)
-
- done = 0
- while not done:
- try:
- thepid,ec = os.waitpid(childpid, 0)
- if ec:
- print 'Exit code ', ec
- status = 'failure'
- else:
- status = 'success'
- done = 1
- except OSError:
- pass
-
- complete = date()
- print '\njob complete... %s' % complete
- jobdir.echofile('.%s' % status, complete)
- jobdir.rmfile('.running')
- jobdir.setstatus('%s on %s' % (status, complete))
+++ /dev/null
-#!/usr/bin/env python2.7
-# Copyright (c) 2006 The Regents of The University of Michigan
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import os, os.path, re, socket, sys
-from os import environ as env, listdir
-from os.path import basename, isdir, isfile, islink, join as joinpath, normpath
-from filecmp import cmp as filecmp
-from shutil import copy
-
-def nfspath(dir):
- if dir.startswith('/.automount/'):
- dir = '/n/%s' % dir[12:]
- elif not dir.startswith('/n/'):
- dir = '/n/%s%s' % (socket.gethostname().split('.')[0], dir)
- return dir
-
-def syncdir(srcdir, destdir):
- srcdir = normpath(srcdir)
- destdir = normpath(destdir)
- if not isdir(destdir):
- sys.exit('destination directory "%s" does not exist' % destdir)
-
- for root, dirs, files in os.walk(srcdir):
- root = normpath(root)
- prefix = os.path.commonprefix([root, srcdir])
- root = root[len(prefix):]
- if root.startswith('/'):
- root = root[1:]
- for rem in [ d for d in dirs if d.startswith('.') or d == 'SCCS']:
- dirs.remove(rem)
-
- for entry in dirs:
- newdir = joinpath(destdir, root, entry)
- if not isdir(newdir):
- os.mkdir(newdir)
- print 'mkdir', newdir
-
- for i,d in enumerate(dirs):
- if islink(joinpath(srcdir, root, d)):
- dirs[i] = joinpath(d, '.')
-
- for entry in files:
- dest = normpath(joinpath(destdir, root, entry))
- src = normpath(joinpath(srcdir, root, entry))
- if not isfile(dest) or not filecmp(src, dest):
- print 'copy %s %s' % (dest, src)
- copy(src, dest)
-
-progpath = nfspath(sys.path[0])
-progname = basename(sys.argv[0])
-usage = """\
-Usage:
- %(progname)s [-c] [-e] [-f] [-j <jobfile>] [-q queue] [-v] <regexp>
- -c clean directory if job can be run
- -C submit the checkpointing runs
- -d Make jobs be dependent on the completion of the checkpoint runs
- -e only echo pbs command info, don't actually send the job
- -f force the job to run regardless of state
- -q <queue> submit job to the named queue
- -j <jobfile> specify the jobfile (default is <rootdir>/Test.py)
- -v be verbose
-
- %(progname)s [-j <jobfile>] -l [-v] <regexp>
- -j <jobfile> specify the jobfile (default is <rootdir>/Test.py)
- -l list job names, don't submit
- -v be verbose (list job parameters)
-
- %(progname)s -h
- -h display this help
-""" % locals()
-
-try:
- import getopt
- opts, args = getopt.getopt(sys.argv[1:], '-Ccdefhj:lnq:Rt:v')
-except getopt.GetoptError:
- sys.exit(usage)
-
-depend = False
-clean = False
-onlyecho = False
-exprs = []
-force = False
-listonly = False
-queue = ''
-verbose = False
-jfile = 'Test.py'
-docpts = False
-doruns = True
-runflag = False
-node_type = 'FAST'
-update = True
-
-for opt,arg in opts:
- if opt == '-C':
- docpts = True
- if opt == '-c':
- clean = True
- if opt == '-d':
- depend = True
- if opt == '-e':
- onlyecho = True
- if opt == '-f':
- force = True
- if opt == '-h':
- print usage
- sys.exit(0)
- if opt == '-j':
- jfile = arg
- if opt == '-l':
- listonly = True
- if opt == '-n':
- update = False
- if opt == '-q':
- queue = arg
- if opt == '-R':
- runflag = True
- if opt == '-t':
- node_type = arg
- if opt == '-v':
- verbose = True
-
-if docpts:
- doruns = runflag
-
-for arg in args:
- exprs.append(re.compile(arg))
-
-import jobfile, batch
-from job import JobDir, date
-
-conf = jobfile.JobFile(jfile)
-
-if update and not listonly and not onlyecho and isdir(conf.linkdir):
- if verbose:
- print 'Checking for outdated files in Link directory'
- if not isdir(conf.basedir):
- os.mkdir(conf.basedir)
- syncdir(conf.linkdir, conf.basedir)
-
-jobnames = {}
-joblist = []
-
-if docpts and doruns:
- gen = conf.alljobs()
-elif docpts:
- gen = conf.checkpoints()
-elif doruns:
- gen = conf.jobs()
-
-for job in gen:
- if job.name in jobnames:
- continue
-
- if exprs:
- for expr in exprs:
- if expr.match(job.name):
- joblist.append(job)
- break
- else:
- joblist.append(job)
-
-if listonly:
- if verbose:
- for job in joblist:
- job.printinfo()
- else:
- for job in joblist:
- print job.name
- sys.exit(0)
-
-if not onlyecho:
- newlist = []
- for job in joblist:
- jobdir = JobDir(joinpath(conf.rootdir, job.name))
- if jobdir.exists():
- if not force:
- status = jobdir.getstatus()
- if status == 'queued':
- continue
-
- if status == 'running':
- continue
-
- if status == 'success':
- continue
-
- if not clean:
- sys.exit('job directory %s not clean!' % jobdir)
-
- jobdir.clean()
- newlist.append(job)
- joblist = newlist
-
-class NameHack(object):
- def __init__(self, host='pbs.pool', port=24465):
- self.host = host
- self.port = port
- self.socket = None
-
- def setname(self, jobid, jobname):
- try:
- jobid = int(jobid)
- except ValueError:
- jobid = int(jobid.strip().split('.')[0])
-
- jobname = jobname.strip()
- # since pbs can handle jobnames of 15 characters or less,
- # don't use the raj hack.
- if len(jobname) <= 15:
- return
-
- if self.socket is None:
- import socket
- self.socket = socket.socket()
- # Connect to pbs.pool and send the jobid/jobname pair to port
- # 24465 (Raj didn't realize that there are only 64k ports and
- # setup inetd to point to port 90001)
- self.socket.connect((self.host, self.port))
-
- self.socket.send("%s %s\n" % (jobid, jobname))
-
-namehack = NameHack()
-
-rootdir = conf.rootdir
-script = joinpath(rootdir, 'Base', 'job.py')
-
-for job in joblist:
- jobdir = JobDir(joinpath(rootdir, job.name))
- if depend:
- cptdir = JobDir(joinpath(rootdir, job.checkpoint.name))
- path = str(cptdir)
- if not isdir(path) or not isfile(joinpath(path, '.success')):
- continue
-
- cptjob = cptdir.readval('.batch_jobid')
-
- if not onlyecho:
- jobdir.create()
- os.chdir(str(jobdir))
- os.environ['PWD'] = str(jobdir)
-
- print 'Job name: %s' % job.name
- print 'Job directory: %s' % jobdir
-
-
- qsub = batch.oarsub()
- qsub.oarhost = 'poolfs.eecs.umich.edu'
- #qsub.stdout = jobdir.file('jobout')
- qsub.name = job.name
- qsub.walltime = '50'
- #qsub.join = True
- #qsub.node_type = node_type
- #qsub.env['ROOTDIR'] = conf.rootdir
- #qsub.env['JOBNAME'] = job.name
- #if depend:
- # qsub.afterok = cptjob
- #if queue:
- # qsub.queue = queue
- qsub.properties = "64bit = 'Yes' or 64bit = 'No'"
- qsub.build(script)
-
- if verbose:
- print 'cwd: %s' % qsub.command
- print 'PBS Command: %s' % qsub.command
-
- if not onlyecho:
- ec = qsub.do()
- if ec == 0:
- jobid = qsub.result
- print 'OAR Jobid: %s' % jobid
- #namehack.setname(jobid, job.name)
- queued = date()
- jobdir.echofile('.batch_jobid', jobid)
- jobdir.echofile('.batch_jobname', job.name)
- jobdir.echofile('.queued', queued)
- jobdir.setstatus('queued on %s' % queued)
- else:
- print 'OAR Failed'
- print
- print
+++ /dev/null
-#!/usr/bin/env python2.7
-# Copyright (c) 2005 The Regents of The University of Michigan
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import os, os.path, shutil, signal, socket, sys
-from os import environ as env
-from os.path import join as joinpath, expanduser
-
-def date():
- import time
- return time.strftime('%a %b %e %H:%M:%S %Z %Y', time.localtime())
-
-def cleandir(dir):
- for root, dirs, files in os.walk(dir, False):
- for name in files:
- os.remove(joinpath(root, name))
- for name in dirs:
- os.rmdir(joinpath(root, name))
-
-class rsync:
- def __init__(self):
- self.sudo = False
- self.rsync = 'rsync'
- self.compress = False
- self.archive = True
- self.delete = False
- self.options = ''
-
- def do(self, src, dst):
- args = []
- if self.sudo:
- args.append('sudo')
-
- args.append(self.rsync)
- if (self.archive):
- args.append('-a')
- if (self.compress):
- args.append('-z')
- if (self.delete):
- args.append('--delete')
- if len(self.options):
- args.append(self.options)
- args.append(src)
- args.append(dst)
-
- return os.spawnvp(os.P_WAIT, args[0], args)
-
-class JobDir(object):
- def __init__(self, dir):
- self.dir = dir
-
- def file(self, filename):
- return joinpath(self.dir, filename)
-
- def create(self):
- if os.path.exists(self.dir):
- if not os.path.isdir(self.dir):
- sys.exit('%s is not a directory. Cannot build job' % self.dir)
- else:
- os.mkdir(self.dir)
-
- def exists(self):
- return os.path.isdir(self.dir)
-
- def clean(self):
- cleandir(self.dir)
-
- def hasfile(self, filename):
- return os.path.isfile(self.file(filename))
-
- def echofile(self, filename, string):
- filename = self.file(filename)
- try:
- f = file(filename, 'w')
- print >>f, string
- f.flush()
- f.close()
- except IOError,e:
- sys.exit(e)
-
- def rmfile(self, filename):
- filename = self.file(filename)
- if os.path.isfile(filename):
- os.unlink(filename)
-
- def readval(self, filename):
- filename = self.file(filename)
- f = file(filename, 'r')
- value = f.readline().strip()
- f.close()
- return value
-
- def setstatus(self, string):
- filename = self.file('.status')
- try:
- f = file(filename, 'a')
- print >>f, string
- f.flush()
- f.close()
- except IOError,e:
- sys.exit(e)
-
- def getstatus(self):
- filename = self.file('.status')
- try:
- f = file(filename, 'r')
- except IOError, e:
- return 'none'
-
- # fast forward to the end
- for line in f: pass
-
- # the first word on the last line is the status
- return line.split(' ')[0]
-
- def __str__(self):
- return self.dir
-
-if __name__ == '__main__':
- rootdir = env.setdefault('ROOTDIR', os.getcwd())
- pbs_jobid = env['PBS_JOBID']
- pbs_jobname = env['PBS_JOBNAME']
- basedir = joinpath(rootdir, 'Base')
- jobname = env.setdefault('JOBNAME', pbs_jobname)
- jobfile = env.setdefault('JOBFILE', joinpath(rootdir, 'Test.py'))
- outdir = env.setdefault('OUTPUT_DIR', joinpath(rootdir, jobname))
- env['POOLJOB'] = 'True'
-
- if os.path.isdir("/work"):
- workbase = "/work"
- else:
- workbase = "/tmp/"
-
- workdir = joinpath(workbase, '%s.%s' % (env['USER'], pbs_jobid))
- host = socket.gethostname()
-
- os.umask(0022)
-
- jobdir = JobDir(outdir)
-
- started = date()
- jobdir.echofile('.running', started)
- jobdir.rmfile('.queued')
- jobdir.echofile('.host', host)
-
- jobdir.setstatus('running on %s on %s' % (host, started))
-
- if os.path.isdir(workdir):
- cleandir(workdir)
- else:
- os.mkdir(workdir)
-
- if False and os.path.isdir('/z/dist'):
- sync = rsync()
- sync.delete = True
- sync.sudo = True
- sync.do('poolfs::dist/m5/', '/z/dist/m5/')
-
- try:
- os.chdir(workdir)
- except OSError,e:
- sys.exit(e)
-
- os.symlink(jobdir.file('output'), 'status.out')
-
- args = [ joinpath(basedir, 'm5'), joinpath(basedir, 'run.py') ]
- if not len(args):
- sys.exit("no arguments")
-
- print 'starting job... %s' % started
- print ' '.join(args)
- print
- sys.stdout.flush()
-
- childpid = os.fork()
- if not childpid:
- # Execute command
- sys.stdin.close()
- fd = os.open(jobdir.file("output"),
- os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
- os.dup2(fd, sys.stdout.fileno())
- os.dup2(fd, sys.stderr.fileno())
- os.execvp(args[0], args)
-
- def handler(signum, frame):
- if childpid != 0:
- os.kill(childpid, signum)
-
- signal.signal(signal.SIGHUP, handler)
- signal.signal(signal.SIGINT, handler)
- signal.signal(signal.SIGQUIT, handler)
- signal.signal(signal.SIGTERM, handler)
- signal.signal(signal.SIGCONT, handler)
- signal.signal(signal.SIGUSR1, handler)
- signal.signal(signal.SIGUSR2, handler)
-
- done = 0
- while not done:
- try:
- thepid,ec = os.waitpid(childpid, 0)
- if ec:
- print 'Exit code ', ec
- status = 'failure'
- else:
- status = 'success'
- done = 1
- except OSError:
- pass
-
- complete = date()
- print '\njob complete... %s' % complete
- jobdir.echofile('.%s' % status, complete)
- jobdir.rmfile('.running')
- jobdir.setstatus('%s on %s' % (status, complete))
+++ /dev/null
-# Copyright (c) 2005 The Regents of The University of Michigan
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import os, popen2, re, sys
-
-class MyPOpen(object):
- def __init__(self, cmd, input = None, output = None, bufsize = -1):
- self.status = -1
-
- if input is None:
- p2c_read, p2c_write = os.pipe()
- self.tochild = os.fdopen(p2c_write, 'w', bufsize)
- else:
- p2c_write = None
- if isinstance(input, file):
- p2c_read = input.fileno()
- elif isinstance(input, str):
- input = file(input, 'r')
- p2c_read = input.fileno()
- elif isinstance(input, int):
- p2c_read = input
- else:
- raise AttributeError
-
- if output is None:
- c2p_read, c2p_write = os.pipe()
- self.fromchild = os.fdopen(c2p_read, 'r', bufsize)
- else:
- c2p_read = None
- if isinstance(output, file):
- c2p_write = output.fileno()
- elif isinstance(output, str):
- output = file(output, 'w')
- c2p_write = output.fileno()
- elif isinstance(output, int):
- c2p_write = output
- else:
- raise AttributeError
-
- self.pid = os.fork()
- if self.pid == 0:
- os.dup2(p2c_read, sys.stdin.fileno())
- os.dup2(c2p_write, sys.stdout.fileno())
- os.dup2(c2p_write, sys.stderr.fileno())
- try:
- os.execvp(cmd[0], cmd)
- finally:
- os._exit(1)
-
- os.close(p2c_read)
- os.close(c2p_write)
-
- def poll(self):
- if self.status < 0:
- pid, status = os.waitpid(self.pid, os.WNOHANG)
- if pid == self.pid:
- self.status = status
- return self.status
-
- def wait(self):
- if self.status < 0:
- pid, status = os.waitpid(self.pid, 0)
- if pid == self.pid:
- self.status = status
- return self.status
-
-class qsub:
- def __init__(self):
- self.afterok = None
- self.hold = False
- self.join = False
- self.keep_stdout = False
- self.keep_stderr = False
- self.node_type = None
- self.mail_abort = False
- self.mail_begin = False
- self.mail_end = False
- self.name = None
- self.stdout = None
- self.priority = None
- self.queue = None
- self.pbshost = None
- self.qsub = 'qsub'
- self.env = {}
-
- def build(self, script, args = []):
- self.cmd = [ self.qsub ]
-
- if self.env:
- arg = '-v'
- arg += ','.join([ '%s=%s' % i for i in self.env.iteritems() ])
- self.cmd.append(arg)
-
- if self.hold:
- self.cmd.append('-h')
-
- if self.stdout:
- self.cmd.append('-olocalhost:' + self.stdout)
-
- if self.keep_stdout and self.keep_stderr:
- self.cmd.append('-koe')
- elif self.keep_stdout:
- self.cmd.append('-ko')
- elif self.keep_stderr:
- self.cmd.append('-ke')
- else:
- self.cmd.append('-kn')
-
- if self.join:
- self.cmd.append('-joe')
-
- if self.node_type:
- self.cmd.append('-lnodes=' + self.node_type)
-
- if self.mail_abort or self.mail_begin or self.mail_end:
- flags = ''
- if self.mail_abort:
- flags.append('a')
- if self.mail_begin:
- flags.append('b')
- if self.mail_end:
- flags.append('e')
- if len(flags):
- self.cmd.append('-m ' + flags)
- else:
- self.cmd.append('-mn')
-
- if self.name:
- self.cmd.append("-N%s" % self.name)
-
- if self.priority:
- self.cmd.append('-p' + self.priority)
-
- if self.queue:
- self.cmd.append('-q' + self.queue)
-
- if self.afterok:
- self.cmd.append('-Wdepend=afterok:%s' % self.afterok)
-
- self.cmd.extend(args)
- self.script = script
- self.command = ' '.join(self.cmd + [ self.script ])
-
- def do(self):
- pbs = MyPOpen(self.cmd + [ self.script ])
- self.result = pbs.fromchild.read()
- ec = pbs.wait()
-
- if ec != 0 and self.pbshost:
- cmd = ' '.join(self.cmd + [ '-' ])
- cmd = [ 'ssh', '-x', self.pbshost, cmd ]
- self.command = ' '.join(cmd)
- ssh = MyPOpen(cmd, input = self.script)
- self.result = ssh.fromchild.read()
- ec = ssh.wait()
-
- return ec
+++ /dev/null
-#!/usr/bin/env python2.7
-# Copyright (c) 2005 The Regents of The University of Michigan
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import os, os.path, re, socket, sys
-from os import environ as env, listdir
-from os.path import basename, isdir, isfile, islink, join as joinpath, normpath
-from filecmp import cmp as filecmp
-from shutil import copy
-
-def nfspath(dir):
- if dir.startswith('/.automount/'):
- dir = '/n/%s' % dir[12:]
- elif not dir.startswith('/n/'):
- dir = '/n/%s%s' % (socket.gethostname().split('.')[0], dir)
- return dir
-
-def syncdir(srcdir, destdir):
- srcdir = normpath(srcdir)
- destdir = normpath(destdir)
- if not isdir(destdir):
- sys.exit('destination directory "%s" does not exist' % destdir)
-
- for root, dirs, files in os.walk(srcdir):
- root = normpath(root)
- prefix = os.path.commonprefix([root, srcdir])
- root = root[len(prefix):]
- if root.startswith('/'):
- root = root[1:]
- for rem in [ d for d in dirs if d.startswith('.') or d == 'SCCS']:
- dirs.remove(rem)
-
- for entry in dirs:
- newdir = joinpath(destdir, root, entry)
- if not isdir(newdir):
- os.mkdir(newdir)
- print 'mkdir', newdir
-
- for i,d in enumerate(dirs):
- if islink(joinpath(srcdir, root, d)):
- dirs[i] = joinpath(d, '.')
-
- for entry in files:
- dest = normpath(joinpath(destdir, root, entry))
- src = normpath(joinpath(srcdir, root, entry))
- if not isfile(dest) or not filecmp(src, dest):
- print 'copy %s %s' % (dest, src)
- copy(src, dest)
-
-progpath = nfspath(sys.path[0])
-progname = basename(sys.argv[0])
-usage = """\
-Usage:
- %(progname)s [-c] [-e] [-f] [-j <jobfile>] [-q queue] [-v] <regexp>
- -c clean directory if job can be run
- -C submit the checkpointing runs
- -d Make jobs be dependent on the completion of the checkpoint runs
- -e only echo pbs command info, don't actually send the job
- -f force the job to run regardless of state
- -q <queue> submit job to the named queue
- -j <jobfile> specify the jobfile (default is <rootdir>/Test.py)
- -v be verbose
-
- %(progname)s [-j <jobfile>] -l [-v] <regexp>
- -j <jobfile> specify the jobfile (default is <rootdir>/Test.py)
- -l list job names, don't submit
- -v be verbose (list job parameters)
-
- %(progname)s -h
- -h display this help
-""" % locals()
-
-try:
- import getopt
- opts, args = getopt.getopt(sys.argv[1:], '-Ccdefhj:lnq:Rt:v')
-except getopt.GetoptError:
- sys.exit(usage)
-
-depend = False
-clean = False
-onlyecho = False
-exprs = []
-force = False
-listonly = False
-queue = ''
-verbose = False
-jfile = 'Test.py'
-docpts = False
-doruns = True
-runflag = False
-node_type = 'FAST'
-update = True
-
-for opt,arg in opts:
- if opt == '-C':
- docpts = True
- if opt == '-c':
- clean = True
- if opt == '-d':
- depend = True
- if opt == '-e':
- onlyecho = True
- if opt == '-f':
- force = True
- if opt == '-h':
- print usage
- sys.exit(0)
- if opt == '-j':
- jfile = arg
- if opt == '-l':
- listonly = True
- if opt == '-n':
- update = False
- if opt == '-q':
- queue = arg
- if opt == '-R':
- runflag = True
- if opt == '-t':
- node_type = arg
- if opt == '-v':
- verbose = True
-
-if docpts:
- doruns = runflag
-
-for arg in args:
- exprs.append(re.compile(arg))
-
-import jobfile, pbs
-from job import JobDir, date
-
-conf = jobfile.JobFile(jfile)
-
-if update and not listonly and not onlyecho and isdir(conf.linkdir):
- if verbose:
- print 'Checking for outdated files in Link directory'
- if not isdir(conf.basedir):
- os.mkdir(conf.basedir)
- syncdir(conf.linkdir, conf.basedir)
-
-jobnames = {}
-joblist = []
-
-if docpts and doruns:
- gen = conf.alljobs()
-elif docpts:
- gen = conf.checkpoints()
-elif doruns:
- gen = conf.jobs()
-
-for job in gen:
- if job.name in jobnames:
- continue
-
- if exprs:
- for expr in exprs:
- if expr.match(job.name):
- joblist.append(job)
- break
- else:
- joblist.append(job)
-
-if listonly:
- if verbose:
- for job in joblist:
- job.printinfo()
- else:
- for job in joblist:
- print job.name
- sys.exit(0)
-
-if not onlyecho:
- newlist = []
- for job in joblist:
- jobdir = JobDir(joinpath(conf.rootdir, job.name))
- if jobdir.exists():
- if not force:
- status = jobdir.getstatus()
- if status == 'queued':
- continue
-
- if status == 'running':
- continue
-
- if status == 'success':
- continue
-
- if not clean:
- sys.exit('job directory %s not clean!' % jobdir)
-
- jobdir.clean()
- newlist.append(job)
- joblist = newlist
-
-class NameHack(object):
- def __init__(self, host='pbs.pool', port=24465):
- self.host = host
- self.port = port
- self.socket = None
-
- def setname(self, jobid, jobname):
- try:
- jobid = int(jobid)
- except ValueError:
- jobid = int(jobid.strip().split('.')[0])
-
- jobname = jobname.strip()
- # since pbs can handle jobnames of 15 characters or less,
- # don't use the raj hack.
- if len(jobname) <= 15:
- return
-
- if self.socket is None:
- import socket
- self.socket = socket.socket()
- # Connect to pbs.pool and send the jobid/jobname pair to port
- # 24465 (Raj didn't realize that there are only 64k ports and
- # setup inetd to point to port 90001)
- self.socket.connect((self.host, self.port))
-
- self.socket.send("%s %s\n" % (jobid, jobname))
-
-namehack = NameHack()
-
-for job in joblist:
- jobdir = JobDir(joinpath(conf.rootdir, job.name))
- if depend:
- cptdir = JobDir(joinpath(conf.rootdir, job.checkpoint.name))
- cptjob = cptdir.readval('.pbs_jobid')
-
- if not onlyecho:
- jobdir.create()
-
- print 'Job name: %s' % job.name
- print 'Job directory: %s' % jobdir
-
- qsub = pbs.qsub()
- qsub.pbshost = 'simpool.eecs.umich.edu'
- qsub.stdout = jobdir.file('jobout')
- qsub.name = job.name[:15]
- qsub.join = True
- qsub.node_type = node_type
- qsub.env['ROOTDIR'] = conf.rootdir
- qsub.env['JOBNAME'] = job.name
- if depend:
- qsub.afterok = cptjob
- if queue:
- qsub.queue = queue
- qsub.build(joinpath(progpath, 'job.py'))
-
- if verbose:
- print 'PBS Command: %s' % qsub.command
-
- if not onlyecho:
- ec = qsub.do()
- if ec == 0:
- jobid = qsub.result
- print 'PBS Jobid: %s' % jobid
- namehack.setname(jobid, job.name)
- queued = date()
- jobdir.echofile('.pbs_jobid', jobid)
- jobdir.echofile('.pbs_jobname', job.name)
- jobdir.echofile('.queued', queued)
- jobdir.setstatus('queued on %s' % queued)
- else:
- print 'PBS Failed'
+++ /dev/null
-#! /usr/bin/env python2.7
-
-# Copyright (c) 2004-2005, 2007 The Regents of The University of Michigan
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-# Important!
-# This script expects a simple $ prompt, if you are using a shell other than
-# sh which defaults to this you'll need to add something like the following
-# to your bashrc/bash_profile script:
-#if [ "$OAR_USER" = "xxxx" ]; then
-# PS1='$ '
-
-
-import sys
-import os
-import re
-import time
-import optparse
-
-import pexpect
-
-progname = os.path.basename(sys.argv[0])
-
-usage = "%prog [options] command [command arguments]"
-optparser = optparse.OptionParser(usage=usage)
-optparser.allow_interspersed_args=False
-optparser.add_option('-e', dest='stderr_file',
- help='command stderr output file')
-optparser.add_option('-o', dest='stdout_file',
- help='command stdout output file')
-optparser.add_option('-l', dest='save_log', action='store_true',
- help='save oarsub output log file')
-optparser.add_option('-N', dest='job_name',
- help='oarsub job name')
-optparser.add_option('-q', dest='dest_queue',
- help='oarsub destination queue')
-optparser.add_option('--qwait', dest='oarsub_timeout', type='int',
- help='oarsub queue wait timeout', default=30*60)
-optparser.add_option('-t', dest='cmd_timeout', type='int',
- help='command execution timeout', default=600*60)
-
-(options, cmd) = optparser.parse_args()
-
-if cmd == []:
- print >>sys.stderr, "%s: missing command" % progname
- sys.exit(1)
-
-# If we want to do this, need to add check here to make sure cmd[0] is
-# a valid PBS job name, else oarsub will die on us.
-#
-#if not options.job_name:
-# options.job_name = cmd[0]
-
-cwd = os.getcwd()
-
-# Deal with systems where /n is a symlink to /.automount
-if cwd.startswith('/.automount/'):
- cwd = cwd.replace('/.automount/', '/n/', 1)
-
-if not cwd.startswith('/n/poolfs/'):
- print >>sys.stderr, "Error: current directory must be under /n/poolfs."
- sys.exit(1)
-
-# The Shell class wraps pexpect.spawn with some handy functions that
-# assume the thing on the other end is a Bourne/bash shell.
-class Shell(pexpect.spawn):
- # Regexp to match the shell prompt. We change the prompt to
- # something fixed and distinctive to make it easier to match
- # reliably.
- prompt_re = re.compile('qdo\$ ')
-
- def __init__(self, cmd):
- # initialize base pexpect.spawn object
- try:
- pexpect.spawn.__init__(self, cmd)
- except pexpect.ExceptionPexpect, exc:
- print "%s:" % progname, exc
- sys.exit(1)
- # full_output accumulates the full output of the session
- self.full_output = ""
- self.quick_timeout = 15
- # wait for a prompt, then change it
- try:
- self.expect('\$ ', options.oarsub_timeout)
- except pexpect.TIMEOUT:
- print >>sys.stderr, "%s: oarsub timed out." % progname
- self.kill(9)
- self.safe_close()
- sys.exit(1)
- self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "')
-
- # version of expect that updates full_output too
- def expect(self, regexp, timeout = -1):
- pexpect.spawn.expect(self, regexp, timeout)
- self.full_output += self.before + self.after
-
- # Just issue a command and wait for the next prompt.
- # Returns a string containing the output of the command.
- def do_bare_command(self, cmd, timeout = -1):
- global full_output
- self.sendline(cmd)
- # read back the echo of the command
- self.readline()
- # wait for the next prompt
- self.expect(self.prompt_re, timeout)
- output = self.before.rstrip()
- return output
-
- # Issue a command, then query its exit status.
- # Returns a (string, int) tuple with the command output and the status.
- def do_command(self, cmd, timeout = -1):
- # do the command itself
- output = self.do_bare_command(cmd, timeout)
- # collect status
- status = int(self.do_bare_command("echo $?", self.quick_timeout))
- return (output, status)
-
- # Check to see if the given directory exists.
- def dir_exists(self, dirname):
- (output, status) = shell.do_command('[ -d %s ]' % dirname,
- self.quick_timeout)
- return status == 0
-
- # Don't actually try to close it.. just wait until it closes by itself
- # We can't actually kill the pid which is what it's trying to do, and if
- # we call wait we could be in an unfortunate situation of it printing input
- # right as we call wait, so the input is never read and the process never ends
- def safe_close(self):
- count = 0
- while self.isalive() and count < 10:
- time.sleep(1)
- self.close(force=False)
-
-# Spawn the interactive pool job.
-
-# Hack to do link on poolfs... disabled for now since
-# compiler/linker/library versioning problems between poolfs and
-# nodes. May never work since poolfs is x86-64 and nodes are 32-bit.
-if False and len(cmd) > 50:
- shell_cmd = 'ssh -t poolfs /bin/sh -l'
- print "%s: running %s on poolfs" % (progname, cmd[0])
-else:
- shell_cmd = 'oarsub -I'
- if options.job_name:
- shell_cmd += ' -n "%s"' % options.job_name
- if options.dest_queue:
- shell_cmd += ' -q ' + options.dest_queue
- shell_cmd += ' -d %s' % cwd
-
-shell = Shell(shell_cmd)
-
-try:
- # chdir to cwd
- (output, status) = shell.do_command('cd ' + cwd)
-
- if status != 0:
- raise OSError, "Can't chdir to %s" % cwd
-
- # wacky hack: sometimes scons will create an output directory then
- # fork a job to generate files in that directory, and the job will
- # get run before the directory creation propagates through NFS.
- # This hack looks for a '-o' option indicating an output file and
- # waits for the corresponding directory to appear if necessary.
- try:
- if 'cc' in cmd[0] or 'g++' in cmd[0]:
- output_dir = os.path.dirname(cmd[cmd.index('-o')+1])
- elif 'm5' in cmd[0]:
- output_dir = cmd[cmd.index('-d')+1]
- else:
- output_dir = None
- except (ValueError, IndexError):
- # no big deal if there's no '-o'/'-d' or if it's the final argument
- output_dir = None
-
- if output_dir:
- secs_waited = 0
- while not shell.dir_exists(output_dir) and secs_waited < 90:
- time.sleep(5)
- secs_waited += 5
- if secs_waited > 30:
- print "waited", secs_waited, "seconds for", output_dir
-
- # run command
- if options.stdout_file:
- cmd += ['>', options.stdout_file]
- if options.stderr_file:
- cmd += ['2>', options.stderr_file]
- try:
- (output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout)
- except pexpect.TIMEOUT:
- print >>sys.stderr, "%s: command timed out after %d seconds." \
- % (progname, options.cmd_timeout)
- shell.sendline('~.') # oarsub/ssh termination escape sequence
- shell.safe_close()
- status = 3
- if output:
- print output
-finally:
- # end job
- if shell.isalive():
- shell.sendline('exit')
- shell.expect('Disconnected from OAR job .*')
- shell.safe_close()
-
- # if there was an error, log the output even if not requested
- if status != 0 or options.save_log:
- log = file('qdo-log.' + str(os.getpid()), 'w')
- log.write(shell.full_output)
- log.close()
-del shell
-
-sys.exit(status)