From 7deb34b24dfdd7033d9e5e28717263bf9f94ec31 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sat, 4 Jul 2020 16:47:41 -0700 Subject: [PATCH] util: Delete util/batch, util/pbs, and util qdo. The util/pbs directory has a set of python scripts which were written to submit jobs to the PBS pool at the University of Michigan. They aren't incredibly specialized for that environment, but they do have a little bit of hard coding which, for instance, uses paths which are only meaningful there. The util/batch directory was added alongside a seemingly unrelated change (perhaps by accident?) and is a slightly updated copy of util/pbs which also (or instead?) supports OAR. The qdo script seems to be a script for managing job queues on PBS and/or OAR, and is also tuned to the UofM environment, for instance insisting that a path starts with /n/poolfs so that files are available on an NFS volume shared with the pool. All three of these scripts could potentially be useful with modification in a similar environment, but also all three are unmaintained. The environment in UofM may no longer actually match the expectations of these scripts, and even if it does/did, gem5 may no longer be 100% compatible with them. If these scripts sit in util not being used by anyone, they add clutter and complexity without adding any value. If someone really needs to know what was once in them, they can be recovered from revision control. Change-Id: I0192bd119893f7a41fcb820f4cf408609b03cd27 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/30957 Reviewed-by: Steve Reinhardt Reviewed-by: Andreas Sandberg Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- util/batch/batch.py | 247 ----------------------------------- util/batch/job.py | 244 ----------------------------------- util/batch/send.py | 304 -------------------------------------------- util/pbs/job.py | 237 ---------------------------------- util/pbs/pbs.py | 180 -------------------------- util/pbs/send.py | 289 ----------------------------------------- util/qdo | 235 ---------------------------------- 7 files changed, 1736 deletions(-) delete mode 100644 util/batch/batch.py delete mode 100755 util/batch/job.py delete mode 100755 util/batch/send.py delete mode 100755 util/pbs/job.py delete mode 100755 util/pbs/pbs.py delete mode 100755 util/pbs/send.py delete mode 100755 util/qdo diff --git a/util/batch/batch.py b/util/batch/batch.py deleted file mode 100644 index fa27542bd..000000000 --- a/util/batch/batch.py +++ /dev/null @@ -1,247 +0,0 @@ -# Copyright (c) 2006 The Regents of The University of Michigan -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import os, popen2, re, sys - -class MyPOpen(object): - def __init__(self, cmd, input = None, output = None, bufsize = -1): - self.status = -1 - - if input is None: - p2c_read, p2c_write = os.pipe() - self.tochild = os.fdopen(p2c_write, 'w', bufsize) - else: - p2c_write = None - if isinstance(input, file): - p2c_read = input.fileno() - elif isinstance(input, str): - input = file(input, 'r') - p2c_read = input.fileno() - elif isinstance(input, int): - p2c_read = input - else: - raise AttributeError - - if output is None: - c2p_read, c2p_write = os.pipe() - self.fromchild = os.fdopen(c2p_read, 'r', bufsize) - else: - c2p_read = None - if isinstance(output, file): - c2p_write = output.fileno() - elif isinstance(output, str): - output = file(output, 'w') - c2p_write = output.fileno() - elif isinstance(output, int): - c2p_write = output - else: - raise AttributeError - - self.pid = os.fork() - if self.pid == 0: - os.dup2(p2c_read, sys.stdin.fileno()) - os.dup2(c2p_write, sys.stdout.fileno()) - os.dup2(c2p_write, sys.stderr.fileno()) - try: - os.execvp(cmd[0], cmd) - finally: - os._exit(1) - - os.close(p2c_read) - os.close(c2p_write) - - def poll(self): - if self.status < 0: - pid, status = os.waitpid(self.pid, os.WNOHANG) - if pid == self.pid: - self.status = status - return self.status - - def wait(self): - if self.status < 0: - pid, status = os.waitpid(self.pid, 0) - if pid == self.pid: - self.status = status - return self.status - - -class oarsub: - def __init__(self): - self.walltime = None - self.queue = None - self.properties = None - - # OAR 2.0 parameters only! - self.name = None - self.afterok = None - self.notify = None - self.stderr = None - self.stdout = None - - - self.oarhost = None - self.oarsub = 'oarsub' - - self.jobid = re.compile('IdJob = (\S+)') - #self.outfile = open("jobnames.dat", "a+") - - def build(self, script, args = []): - self.cmd = [ self.oarsub ] - - print "args:", args - print "script:", script - if self.properties: - self.cmd.append('-p"%s"' % self.properties ) - - if self.queue: - self.cmd.append('-q "%s"' % self.queue) - - if self.walltime: - self.cmd.append('-l walltime=%s' % self.walltime) - - if script[0] != "/": - self.script = os.getcwd() - else: - self.script = script - - self.cmd.extend(args) - self.cmd.append(self.script) - #cmd = [ 'ssh', '-x', self.oarhost, '"cd %s; %s"' % (os.getcwd(), self.command) ] - self.command = ' '.join(self.cmd) - - print "command: [%s]" % self.command - - def do(self): - oar = MyPOpen(self.cmd) - self.result = oar.fromchild.read() - ec = oar.wait() - - if ec != 0 and self.oarhost: - pstdin, pstdout = os.popen4(self.command) - self.result = pstdout.read() - - jobid = self.jobid.match(self.result) - if jobid == None: - print "Couldn't get jobid from [%s]" % self.result - sys.exit(1) - else: - #self.outfile.write("%d %s\n" %(int(jobid.group(1)), self.name)); - #self.outfile.flush() - self.result = jobid.group(1) - - return 0 - -class qsub: - def __init__(self): - self.afterok = None - self.hold = False - self.join = False - self.keep_stdout = False - self.keep_stderr = False - self.node_type = None - self.mail_abort = False - self.mail_begin = False - self.mail_end = False - self.name = None - self.stdout = None - self.priority = None - self.queue = None - self.pbshost = None - self.qsub = 'qsub' - self.env = {} - - def build(self, script, args = []): - self.cmd = [ self.qsub ] - - if self.env: - arg = '-v' - arg += ','.join([ '%s=%s' % i for i in self.env.iteritems() ]) - self.cmd.append(arg) - - if self.hold: - self.cmd.append('-h') - - if self.stdout: - self.cmd.append('-olocalhost:' + self.stdout) - - if self.keep_stdout and self.keep_stderr: - self.cmd.append('-koe') - elif self.keep_stdout: - self.cmd.append('-ko') - elif self.keep_stderr: - self.cmd.append('-ke') - else: - self.cmd.append('-kn') - - if self.join: - self.cmd.append('-joe') - - if self.node_type: - self.cmd.append('-lnodes=' + self.node_type) - - if self.mail_abort or self.mail_begin or self.mail_end: - flags = '' - if self.mail_abort: - flags.append('a') - if self.mail_begin: - flags.append('b') - if self.mail_end: - flags.append('e') - if len(flags): - self.cmd.append('-m ' + flags) - else: - self.cmd.append('-mn') - - if self.name: - self.cmd.append("-N%s" % self.name) - - if self.priority: - self.cmd.append('-p' + self.priority) - - if self.queue: - self.cmd.append('-q' + self.queue) - - if self.afterok: - self.cmd.append('-Wdepend=afterok:%s' % self.afterok) - - self.cmd.extend(args) - self.script = script - self.command = ' '.join(self.cmd + [ self.script ]) - - def do(self): - pbs = MyPOpen(self.cmd + [ self.script ]) - self.result = pbs.fromchild.read() - ec = pbs.wait() - - if ec != 0 and self.pbshost: - cmd = ' '.join(self.cmd + [ '-' ]) - cmd = [ 'ssh', '-x', self.pbshost, cmd ] - self.command = ' '.join(cmd) - ssh = MyPOpen(cmd, input = self.script) - self.result = ssh.fromchild.read() - ec = ssh.wait() - - return ec diff --git a/util/batch/job.py b/util/batch/job.py deleted file mode 100755 index c070e8522..000000000 --- a/util/batch/job.py +++ /dev/null @@ -1,244 +0,0 @@ -#!/usr/bin/env python2.7 -# Copyright (c) 2006 The Regents of The University of Michigan -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import os, os.path, shutil, signal, socket, sys -from os import environ as env -from os.path import join as joinpath, expanduser - -def date(): - import time - return time.strftime('%a %b %e %H:%M:%S %Z %Y', time.localtime()) - -def cleandir(dir): - for root, dirs, files in os.walk(dir, False): - for name in files: - os.remove(joinpath(root, name)) - for name in dirs: - os.rmdir(joinpath(root, name)) - -class rsync: - def __init__(self): - self.sudo = False - self.rsync = 'rsync' - self.compress = False - self.archive = True - self.delete = False - self.options = '' - - def do(self, src, dst): - args = [] - if self.sudo: - args.append('sudo') - - args.append(self.rsync) - if (self.archive): - args.append('-a') - if (self.compress): - args.append('-z') - if (self.delete): - args.append('--delete') - if len(self.options): - args.append(self.options) - args.append(src) - args.append(dst) - - return os.spawnvp(os.P_WAIT, args[0], args) - -class JobDir(object): - def __init__(self, dir): - self.dir = dir - - def file(self, filename): - return joinpath(self.dir, filename) - - def create(self): - if os.path.exists(self.dir): - if not os.path.isdir(self.dir): - sys.exit('%s is not a directory. Cannot build job' % self.dir) - else: - os.mkdir(self.dir) - - def exists(self): - return os.path.isdir(self.dir) - - def clean(self): - cleandir(self.dir) - - def hasfile(self, filename): - return os.path.isfile(self.file(filename)) - - def echofile(self, filename, string): - filename = self.file(filename) - try: - f = file(filename, 'w') - print >>f, string - f.flush() - f.close() - except IOError,e: - sys.exit(e) - - def rmfile(self, filename): - filename = self.file(filename) - if os.path.isfile(filename): - os.unlink(filename) - - def readval(self, filename): - filename = self.file(filename) - f = file(filename, 'r') - value = f.readline().strip() - f.close() - return value - - def setstatus(self, string): - filename = self.file('.status') - try: - f = file(filename, 'a') - print >>f, string - f.flush() - f.close() - except IOError,e: - sys.exit(e) - - def getstatus(self): - filename = self.file('.status') - try: - f = file(filename, 'r') - except IOError, e: - return 'none' - - # fast forward to the end - for line in f: pass - - # the first word on the last line is the status - return line.split(' ')[0] - - def __str__(self): - return self.dir - -if __name__ == '__main__': - import platform - binaries = { 'i686' : 'm5.i386', - 'x86_64' : 'm5.amd64' } - binary = binaries[platform.machine()] - - cwd = os.getcwd() - rootdir = env.setdefault('ROOTDIR', os.path.dirname(cwd)) - oar_jobid = int(env['OAR_JOBID']) - oar_jobname = os.path.basename(cwd) - #pbs_jobname = env['PBS_JOBNAME'] - basedir = joinpath(rootdir, 'Base') - jobname = env.setdefault('JOBNAME', oar_jobname) - jobfile = env.setdefault('JOBFILE', joinpath(rootdir, 'Test.py')) - outdir = env.setdefault('OUTPUT_DIR', cwd) - env['POOLJOB'] = 'True' - - if os.path.isdir("/work"): - workbase = "/work" - else: - workbase = "/tmp/" - - workdir = joinpath(workbase, '%s.%s' % (env['USER'], oar_jobid)) - host = socket.gethostname() - - os.umask(0022) - - jobdir = JobDir(outdir) - - started = date() - jobdir.echofile('.running', started) - jobdir.rmfile('.queued') - jobdir.echofile('.host', host) - - jobdir.setstatus('running on %s on %s' % (host, started)) - - if os.path.isdir(workdir): - cleandir(workdir) - else: - os.mkdir(workdir) - - if False and os.path.isdir('/z/dist'): - sync = rsync() - sync.delete = True - sync.sudo = True - sync.do('poolfs::dist/m5/', '/z/dist/m5/') - - try: - os.chdir(workdir) - except OSError,e: - sys.exit(e) - - os.symlink(jobdir.file('output'), 'status.out') - - args = [ joinpath(basedir, binary), joinpath(basedir, 'run.py') ] - if not len(args): - sys.exit("no arguments") - - print 'starting job... %s' % started - print ' '.join(args) - print - sys.stdout.flush() - - childpid = os.fork() - if not childpid: - # Execute command - sys.stdin.close() - fd = os.open(jobdir.file("output"), - os.O_WRONLY | os.O_CREAT | os.O_TRUNC) - os.dup2(fd, sys.stdout.fileno()) - os.dup2(fd, sys.stderr.fileno()) - os.execvp(args[0], args) - - def handler(signum, frame): - if childpid != 0: - os.kill(childpid, signum) - - signal.signal(signal.SIGHUP, handler) - signal.signal(signal.SIGINT, handler) - signal.signal(signal.SIGQUIT, handler) - signal.signal(signal.SIGTERM, handler) - signal.signal(signal.SIGCONT, handler) - signal.signal(signal.SIGUSR1, handler) - signal.signal(signal.SIGUSR2, handler) - - done = 0 - while not done: - try: - thepid,ec = os.waitpid(childpid, 0) - if ec: - print 'Exit code ', ec - status = 'failure' - else: - status = 'success' - done = 1 - except OSError: - pass - - complete = date() - print '\njob complete... %s' % complete - jobdir.echofile('.%s' % status, complete) - jobdir.rmfile('.running') - jobdir.setstatus('%s on %s' % (status, complete)) diff --git a/util/batch/send.py b/util/batch/send.py deleted file mode 100755 index b4d822bd8..000000000 --- a/util/batch/send.py +++ /dev/null @@ -1,304 +0,0 @@ -#!/usr/bin/env python2.7 -# Copyright (c) 2006 The Regents of The University of Michigan -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import os, os.path, re, socket, sys -from os import environ as env, listdir -from os.path import basename, isdir, isfile, islink, join as joinpath, normpath -from filecmp import cmp as filecmp -from shutil import copy - -def nfspath(dir): - if dir.startswith('/.automount/'): - dir = '/n/%s' % dir[12:] - elif not dir.startswith('/n/'): - dir = '/n/%s%s' % (socket.gethostname().split('.')[0], dir) - return dir - -def syncdir(srcdir, destdir): - srcdir = normpath(srcdir) - destdir = normpath(destdir) - if not isdir(destdir): - sys.exit('destination directory "%s" does not exist' % destdir) - - for root, dirs, files in os.walk(srcdir): - root = normpath(root) - prefix = os.path.commonprefix([root, srcdir]) - root = root[len(prefix):] - if root.startswith('/'): - root = root[1:] - for rem in [ d for d in dirs if d.startswith('.') or d == 'SCCS']: - dirs.remove(rem) - - for entry in dirs: - newdir = joinpath(destdir, root, entry) - if not isdir(newdir): - os.mkdir(newdir) - print 'mkdir', newdir - - for i,d in enumerate(dirs): - if islink(joinpath(srcdir, root, d)): - dirs[i] = joinpath(d, '.') - - for entry in files: - dest = normpath(joinpath(destdir, root, entry)) - src = normpath(joinpath(srcdir, root, entry)) - if not isfile(dest) or not filecmp(src, dest): - print 'copy %s %s' % (dest, src) - copy(src, dest) - -progpath = nfspath(sys.path[0]) -progname = basename(sys.argv[0]) -usage = """\ -Usage: - %(progname)s [-c] [-e] [-f] [-j ] [-q queue] [-v] - -c clean directory if job can be run - -C submit the checkpointing runs - -d Make jobs be dependent on the completion of the checkpoint runs - -e only echo pbs command info, don't actually send the job - -f force the job to run regardless of state - -q submit job to the named queue - -j specify the jobfile (default is /Test.py) - -v be verbose - - %(progname)s [-j ] -l [-v] - -j specify the jobfile (default is /Test.py) - -l list job names, don't submit - -v be verbose (list job parameters) - - %(progname)s -h - -h display this help -""" % locals() - -try: - import getopt - opts, args = getopt.getopt(sys.argv[1:], '-Ccdefhj:lnq:Rt:v') -except getopt.GetoptError: - sys.exit(usage) - -depend = False -clean = False -onlyecho = False -exprs = [] -force = False -listonly = False -queue = '' -verbose = False -jfile = 'Test.py' -docpts = False -doruns = True -runflag = False -node_type = 'FAST' -update = True - -for opt,arg in opts: - if opt == '-C': - docpts = True - if opt == '-c': - clean = True - if opt == '-d': - depend = True - if opt == '-e': - onlyecho = True - if opt == '-f': - force = True - if opt == '-h': - print usage - sys.exit(0) - if opt == '-j': - jfile = arg - if opt == '-l': - listonly = True - if opt == '-n': - update = False - if opt == '-q': - queue = arg - if opt == '-R': - runflag = True - if opt == '-t': - node_type = arg - if opt == '-v': - verbose = True - -if docpts: - doruns = runflag - -for arg in args: - exprs.append(re.compile(arg)) - -import jobfile, batch -from job import JobDir, date - -conf = jobfile.JobFile(jfile) - -if update and not listonly and not onlyecho and isdir(conf.linkdir): - if verbose: - print 'Checking for outdated files in Link directory' - if not isdir(conf.basedir): - os.mkdir(conf.basedir) - syncdir(conf.linkdir, conf.basedir) - -jobnames = {} -joblist = [] - -if docpts and doruns: - gen = conf.alljobs() -elif docpts: - gen = conf.checkpoints() -elif doruns: - gen = conf.jobs() - -for job in gen: - if job.name in jobnames: - continue - - if exprs: - for expr in exprs: - if expr.match(job.name): - joblist.append(job) - break - else: - joblist.append(job) - -if listonly: - if verbose: - for job in joblist: - job.printinfo() - else: - for job in joblist: - print job.name - sys.exit(0) - -if not onlyecho: - newlist = [] - for job in joblist: - jobdir = JobDir(joinpath(conf.rootdir, job.name)) - if jobdir.exists(): - if not force: - status = jobdir.getstatus() - if status == 'queued': - continue - - if status == 'running': - continue - - if status == 'success': - continue - - if not clean: - sys.exit('job directory %s not clean!' % jobdir) - - jobdir.clean() - newlist.append(job) - joblist = newlist - -class NameHack(object): - def __init__(self, host='pbs.pool', port=24465): - self.host = host - self.port = port - self.socket = None - - def setname(self, jobid, jobname): - try: - jobid = int(jobid) - except ValueError: - jobid = int(jobid.strip().split('.')[0]) - - jobname = jobname.strip() - # since pbs can handle jobnames of 15 characters or less, - # don't use the raj hack. - if len(jobname) <= 15: - return - - if self.socket is None: - import socket - self.socket = socket.socket() - # Connect to pbs.pool and send the jobid/jobname pair to port - # 24465 (Raj didn't realize that there are only 64k ports and - # setup inetd to point to port 90001) - self.socket.connect((self.host, self.port)) - - self.socket.send("%s %s\n" % (jobid, jobname)) - -namehack = NameHack() - -rootdir = conf.rootdir -script = joinpath(rootdir, 'Base', 'job.py') - -for job in joblist: - jobdir = JobDir(joinpath(rootdir, job.name)) - if depend: - cptdir = JobDir(joinpath(rootdir, job.checkpoint.name)) - path = str(cptdir) - if not isdir(path) or not isfile(joinpath(path, '.success')): - continue - - cptjob = cptdir.readval('.batch_jobid') - - if not onlyecho: - jobdir.create() - os.chdir(str(jobdir)) - os.environ['PWD'] = str(jobdir) - - print 'Job name: %s' % job.name - print 'Job directory: %s' % jobdir - - - qsub = batch.oarsub() - qsub.oarhost = 'poolfs.eecs.umich.edu' - #qsub.stdout = jobdir.file('jobout') - qsub.name = job.name - qsub.walltime = '50' - #qsub.join = True - #qsub.node_type = node_type - #qsub.env['ROOTDIR'] = conf.rootdir - #qsub.env['JOBNAME'] = job.name - #if depend: - # qsub.afterok = cptjob - #if queue: - # qsub.queue = queue - qsub.properties = "64bit = 'Yes' or 64bit = 'No'" - qsub.build(script) - - if verbose: - print 'cwd: %s' % qsub.command - print 'PBS Command: %s' % qsub.command - - if not onlyecho: - ec = qsub.do() - if ec == 0: - jobid = qsub.result - print 'OAR Jobid: %s' % jobid - #namehack.setname(jobid, job.name) - queued = date() - jobdir.echofile('.batch_jobid', jobid) - jobdir.echofile('.batch_jobname', job.name) - jobdir.echofile('.queued', queued) - jobdir.setstatus('queued on %s' % queued) - else: - print 'OAR Failed' - print - print diff --git a/util/pbs/job.py b/util/pbs/job.py deleted file mode 100755 index aa02a10da..000000000 --- a/util/pbs/job.py +++ /dev/null @@ -1,237 +0,0 @@ -#!/usr/bin/env python2.7 -# Copyright (c) 2005 The Regents of The University of Michigan -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import os, os.path, shutil, signal, socket, sys -from os import environ as env -from os.path import join as joinpath, expanduser - -def date(): - import time - return time.strftime('%a %b %e %H:%M:%S %Z %Y', time.localtime()) - -def cleandir(dir): - for root, dirs, files in os.walk(dir, False): - for name in files: - os.remove(joinpath(root, name)) - for name in dirs: - os.rmdir(joinpath(root, name)) - -class rsync: - def __init__(self): - self.sudo = False - self.rsync = 'rsync' - self.compress = False - self.archive = True - self.delete = False - self.options = '' - - def do(self, src, dst): - args = [] - if self.sudo: - args.append('sudo') - - args.append(self.rsync) - if (self.archive): - args.append('-a') - if (self.compress): - args.append('-z') - if (self.delete): - args.append('--delete') - if len(self.options): - args.append(self.options) - args.append(src) - args.append(dst) - - return os.spawnvp(os.P_WAIT, args[0], args) - -class JobDir(object): - def __init__(self, dir): - self.dir = dir - - def file(self, filename): - return joinpath(self.dir, filename) - - def create(self): - if os.path.exists(self.dir): - if not os.path.isdir(self.dir): - sys.exit('%s is not a directory. Cannot build job' % self.dir) - else: - os.mkdir(self.dir) - - def exists(self): - return os.path.isdir(self.dir) - - def clean(self): - cleandir(self.dir) - - def hasfile(self, filename): - return os.path.isfile(self.file(filename)) - - def echofile(self, filename, string): - filename = self.file(filename) - try: - f = file(filename, 'w') - print >>f, string - f.flush() - f.close() - except IOError,e: - sys.exit(e) - - def rmfile(self, filename): - filename = self.file(filename) - if os.path.isfile(filename): - os.unlink(filename) - - def readval(self, filename): - filename = self.file(filename) - f = file(filename, 'r') - value = f.readline().strip() - f.close() - return value - - def setstatus(self, string): - filename = self.file('.status') - try: - f = file(filename, 'a') - print >>f, string - f.flush() - f.close() - except IOError,e: - sys.exit(e) - - def getstatus(self): - filename = self.file('.status') - try: - f = file(filename, 'r') - except IOError, e: - return 'none' - - # fast forward to the end - for line in f: pass - - # the first word on the last line is the status - return line.split(' ')[0] - - def __str__(self): - return self.dir - -if __name__ == '__main__': - rootdir = env.setdefault('ROOTDIR', os.getcwd()) - pbs_jobid = env['PBS_JOBID'] - pbs_jobname = env['PBS_JOBNAME'] - basedir = joinpath(rootdir, 'Base') - jobname = env.setdefault('JOBNAME', pbs_jobname) - jobfile = env.setdefault('JOBFILE', joinpath(rootdir, 'Test.py')) - outdir = env.setdefault('OUTPUT_DIR', joinpath(rootdir, jobname)) - env['POOLJOB'] = 'True' - - if os.path.isdir("/work"): - workbase = "/work" - else: - workbase = "/tmp/" - - workdir = joinpath(workbase, '%s.%s' % (env['USER'], pbs_jobid)) - host = socket.gethostname() - - os.umask(0022) - - jobdir = JobDir(outdir) - - started = date() - jobdir.echofile('.running', started) - jobdir.rmfile('.queued') - jobdir.echofile('.host', host) - - jobdir.setstatus('running on %s on %s' % (host, started)) - - if os.path.isdir(workdir): - cleandir(workdir) - else: - os.mkdir(workdir) - - if False and os.path.isdir('/z/dist'): - sync = rsync() - sync.delete = True - sync.sudo = True - sync.do('poolfs::dist/m5/', '/z/dist/m5/') - - try: - os.chdir(workdir) - except OSError,e: - sys.exit(e) - - os.symlink(jobdir.file('output'), 'status.out') - - args = [ joinpath(basedir, 'm5'), joinpath(basedir, 'run.py') ] - if not len(args): - sys.exit("no arguments") - - print 'starting job... %s' % started - print ' '.join(args) - print - sys.stdout.flush() - - childpid = os.fork() - if not childpid: - # Execute command - sys.stdin.close() - fd = os.open(jobdir.file("output"), - os.O_WRONLY | os.O_CREAT | os.O_TRUNC) - os.dup2(fd, sys.stdout.fileno()) - os.dup2(fd, sys.stderr.fileno()) - os.execvp(args[0], args) - - def handler(signum, frame): - if childpid != 0: - os.kill(childpid, signum) - - signal.signal(signal.SIGHUP, handler) - signal.signal(signal.SIGINT, handler) - signal.signal(signal.SIGQUIT, handler) - signal.signal(signal.SIGTERM, handler) - signal.signal(signal.SIGCONT, handler) - signal.signal(signal.SIGUSR1, handler) - signal.signal(signal.SIGUSR2, handler) - - done = 0 - while not done: - try: - thepid,ec = os.waitpid(childpid, 0) - if ec: - print 'Exit code ', ec - status = 'failure' - else: - status = 'success' - done = 1 - except OSError: - pass - - complete = date() - print '\njob complete... %s' % complete - jobdir.echofile('.%s' % status, complete) - jobdir.rmfile('.running') - jobdir.setstatus('%s on %s' % (status, complete)) diff --git a/util/pbs/pbs.py b/util/pbs/pbs.py deleted file mode 100755 index b8b5a3337..000000000 --- a/util/pbs/pbs.py +++ /dev/null @@ -1,180 +0,0 @@ -# Copyright (c) 2005 The Regents of The University of Michigan -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import os, popen2, re, sys - -class MyPOpen(object): - def __init__(self, cmd, input = None, output = None, bufsize = -1): - self.status = -1 - - if input is None: - p2c_read, p2c_write = os.pipe() - self.tochild = os.fdopen(p2c_write, 'w', bufsize) - else: - p2c_write = None - if isinstance(input, file): - p2c_read = input.fileno() - elif isinstance(input, str): - input = file(input, 'r') - p2c_read = input.fileno() - elif isinstance(input, int): - p2c_read = input - else: - raise AttributeError - - if output is None: - c2p_read, c2p_write = os.pipe() - self.fromchild = os.fdopen(c2p_read, 'r', bufsize) - else: - c2p_read = None - if isinstance(output, file): - c2p_write = output.fileno() - elif isinstance(output, str): - output = file(output, 'w') - c2p_write = output.fileno() - elif isinstance(output, int): - c2p_write = output - else: - raise AttributeError - - self.pid = os.fork() - if self.pid == 0: - os.dup2(p2c_read, sys.stdin.fileno()) - os.dup2(c2p_write, sys.stdout.fileno()) - os.dup2(c2p_write, sys.stderr.fileno()) - try: - os.execvp(cmd[0], cmd) - finally: - os._exit(1) - - os.close(p2c_read) - os.close(c2p_write) - - def poll(self): - if self.status < 0: - pid, status = os.waitpid(self.pid, os.WNOHANG) - if pid == self.pid: - self.status = status - return self.status - - def wait(self): - if self.status < 0: - pid, status = os.waitpid(self.pid, 0) - if pid == self.pid: - self.status = status - return self.status - -class qsub: - def __init__(self): - self.afterok = None - self.hold = False - self.join = False - self.keep_stdout = False - self.keep_stderr = False - self.node_type = None - self.mail_abort = False - self.mail_begin = False - self.mail_end = False - self.name = None - self.stdout = None - self.priority = None - self.queue = None - self.pbshost = None - self.qsub = 'qsub' - self.env = {} - - def build(self, script, args = []): - self.cmd = [ self.qsub ] - - if self.env: - arg = '-v' - arg += ','.join([ '%s=%s' % i for i in self.env.iteritems() ]) - self.cmd.append(arg) - - if self.hold: - self.cmd.append('-h') - - if self.stdout: - self.cmd.append('-olocalhost:' + self.stdout) - - if self.keep_stdout and self.keep_stderr: - self.cmd.append('-koe') - elif self.keep_stdout: - self.cmd.append('-ko') - elif self.keep_stderr: - self.cmd.append('-ke') - else: - self.cmd.append('-kn') - - if self.join: - self.cmd.append('-joe') - - if self.node_type: - self.cmd.append('-lnodes=' + self.node_type) - - if self.mail_abort or self.mail_begin or self.mail_end: - flags = '' - if self.mail_abort: - flags.append('a') - if self.mail_begin: - flags.append('b') - if self.mail_end: - flags.append('e') - if len(flags): - self.cmd.append('-m ' + flags) - else: - self.cmd.append('-mn') - - if self.name: - self.cmd.append("-N%s" % self.name) - - if self.priority: - self.cmd.append('-p' + self.priority) - - if self.queue: - self.cmd.append('-q' + self.queue) - - if self.afterok: - self.cmd.append('-Wdepend=afterok:%s' % self.afterok) - - self.cmd.extend(args) - self.script = script - self.command = ' '.join(self.cmd + [ self.script ]) - - def do(self): - pbs = MyPOpen(self.cmd + [ self.script ]) - self.result = pbs.fromchild.read() - ec = pbs.wait() - - if ec != 0 and self.pbshost: - cmd = ' '.join(self.cmd + [ '-' ]) - cmd = [ 'ssh', '-x', self.pbshost, cmd ] - self.command = ' '.join(cmd) - ssh = MyPOpen(cmd, input = self.script) - self.result = ssh.fromchild.read() - ec = ssh.wait() - - return ec diff --git a/util/pbs/send.py b/util/pbs/send.py deleted file mode 100755 index dc2f8f986..000000000 --- a/util/pbs/send.py +++ /dev/null @@ -1,289 +0,0 @@ -#!/usr/bin/env python2.7 -# Copyright (c) 2005 The Regents of The University of Michigan -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import os, os.path, re, socket, sys -from os import environ as env, listdir -from os.path import basename, isdir, isfile, islink, join as joinpath, normpath -from filecmp import cmp as filecmp -from shutil import copy - -def nfspath(dir): - if dir.startswith('/.automount/'): - dir = '/n/%s' % dir[12:] - elif not dir.startswith('/n/'): - dir = '/n/%s%s' % (socket.gethostname().split('.')[0], dir) - return dir - -def syncdir(srcdir, destdir): - srcdir = normpath(srcdir) - destdir = normpath(destdir) - if not isdir(destdir): - sys.exit('destination directory "%s" does not exist' % destdir) - - for root, dirs, files in os.walk(srcdir): - root = normpath(root) - prefix = os.path.commonprefix([root, srcdir]) - root = root[len(prefix):] - if root.startswith('/'): - root = root[1:] - for rem in [ d for d in dirs if d.startswith('.') or d == 'SCCS']: - dirs.remove(rem) - - for entry in dirs: - newdir = joinpath(destdir, root, entry) - if not isdir(newdir): - os.mkdir(newdir) - print 'mkdir', newdir - - for i,d in enumerate(dirs): - if islink(joinpath(srcdir, root, d)): - dirs[i] = joinpath(d, '.') - - for entry in files: - dest = normpath(joinpath(destdir, root, entry)) - src = normpath(joinpath(srcdir, root, entry)) - if not isfile(dest) or not filecmp(src, dest): - print 'copy %s %s' % (dest, src) - copy(src, dest) - -progpath = nfspath(sys.path[0]) -progname = basename(sys.argv[0]) -usage = """\ -Usage: - %(progname)s [-c] [-e] [-f] [-j ] [-q queue] [-v] - -c clean directory if job can be run - -C submit the checkpointing runs - -d Make jobs be dependent on the completion of the checkpoint runs - -e only echo pbs command info, don't actually send the job - -f force the job to run regardless of state - -q submit job to the named queue - -j specify the jobfile (default is /Test.py) - -v be verbose - - %(progname)s [-j ] -l [-v] - -j specify the jobfile (default is /Test.py) - -l list job names, don't submit - -v be verbose (list job parameters) - - %(progname)s -h - -h display this help -""" % locals() - -try: - import getopt - opts, args = getopt.getopt(sys.argv[1:], '-Ccdefhj:lnq:Rt:v') -except getopt.GetoptError: - sys.exit(usage) - -depend = False -clean = False -onlyecho = False -exprs = [] -force = False -listonly = False -queue = '' -verbose = False -jfile = 'Test.py' -docpts = False -doruns = True -runflag = False -node_type = 'FAST' -update = True - -for opt,arg in opts: - if opt == '-C': - docpts = True - if opt == '-c': - clean = True - if opt == '-d': - depend = True - if opt == '-e': - onlyecho = True - if opt == '-f': - force = True - if opt == '-h': - print usage - sys.exit(0) - if opt == '-j': - jfile = arg - if opt == '-l': - listonly = True - if opt == '-n': - update = False - if opt == '-q': - queue = arg - if opt == '-R': - runflag = True - if opt == '-t': - node_type = arg - if opt == '-v': - verbose = True - -if docpts: - doruns = runflag - -for arg in args: - exprs.append(re.compile(arg)) - -import jobfile, pbs -from job import JobDir, date - -conf = jobfile.JobFile(jfile) - -if update and not listonly and not onlyecho and isdir(conf.linkdir): - if verbose: - print 'Checking for outdated files in Link directory' - if not isdir(conf.basedir): - os.mkdir(conf.basedir) - syncdir(conf.linkdir, conf.basedir) - -jobnames = {} -joblist = [] - -if docpts and doruns: - gen = conf.alljobs() -elif docpts: - gen = conf.checkpoints() -elif doruns: - gen = conf.jobs() - -for job in gen: - if job.name in jobnames: - continue - - if exprs: - for expr in exprs: - if expr.match(job.name): - joblist.append(job) - break - else: - joblist.append(job) - -if listonly: - if verbose: - for job in joblist: - job.printinfo() - else: - for job in joblist: - print job.name - sys.exit(0) - -if not onlyecho: - newlist = [] - for job in joblist: - jobdir = JobDir(joinpath(conf.rootdir, job.name)) - if jobdir.exists(): - if not force: - status = jobdir.getstatus() - if status == 'queued': - continue - - if status == 'running': - continue - - if status == 'success': - continue - - if not clean: - sys.exit('job directory %s not clean!' % jobdir) - - jobdir.clean() - newlist.append(job) - joblist = newlist - -class NameHack(object): - def __init__(self, host='pbs.pool', port=24465): - self.host = host - self.port = port - self.socket = None - - def setname(self, jobid, jobname): - try: - jobid = int(jobid) - except ValueError: - jobid = int(jobid.strip().split('.')[0]) - - jobname = jobname.strip() - # since pbs can handle jobnames of 15 characters or less, - # don't use the raj hack. - if len(jobname) <= 15: - return - - if self.socket is None: - import socket - self.socket = socket.socket() - # Connect to pbs.pool and send the jobid/jobname pair to port - # 24465 (Raj didn't realize that there are only 64k ports and - # setup inetd to point to port 90001) - self.socket.connect((self.host, self.port)) - - self.socket.send("%s %s\n" % (jobid, jobname)) - -namehack = NameHack() - -for job in joblist: - jobdir = JobDir(joinpath(conf.rootdir, job.name)) - if depend: - cptdir = JobDir(joinpath(conf.rootdir, job.checkpoint.name)) - cptjob = cptdir.readval('.pbs_jobid') - - if not onlyecho: - jobdir.create() - - print 'Job name: %s' % job.name - print 'Job directory: %s' % jobdir - - qsub = pbs.qsub() - qsub.pbshost = 'simpool.eecs.umich.edu' - qsub.stdout = jobdir.file('jobout') - qsub.name = job.name[:15] - qsub.join = True - qsub.node_type = node_type - qsub.env['ROOTDIR'] = conf.rootdir - qsub.env['JOBNAME'] = job.name - if depend: - qsub.afterok = cptjob - if queue: - qsub.queue = queue - qsub.build(joinpath(progpath, 'job.py')) - - if verbose: - print 'PBS Command: %s' % qsub.command - - if not onlyecho: - ec = qsub.do() - if ec == 0: - jobid = qsub.result - print 'PBS Jobid: %s' % jobid - namehack.setname(jobid, job.name) - queued = date() - jobdir.echofile('.pbs_jobid', jobid) - jobdir.echofile('.pbs_jobname', job.name) - jobdir.echofile('.queued', queued) - jobdir.setstatus('queued on %s' % queued) - else: - print 'PBS Failed' diff --git a/util/qdo b/util/qdo deleted file mode 100755 index e2a5f2d80..000000000 --- a/util/qdo +++ /dev/null @@ -1,235 +0,0 @@ -#! /usr/bin/env python2.7 - -# Copyright (c) 2004-2005, 2007 The Regents of The University of Michigan -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -# Important! -# This script expects a simple $ prompt, if you are using a shell other than -# sh which defaults to this you'll need to add something like the following -# to your bashrc/bash_profile script: -#if [ "$OAR_USER" = "xxxx" ]; then -# PS1='$ ' - - -import sys -import os -import re -import time -import optparse - -import pexpect - -progname = os.path.basename(sys.argv[0]) - -usage = "%prog [options] command [command arguments]" -optparser = optparse.OptionParser(usage=usage) -optparser.allow_interspersed_args=False -optparser.add_option('-e', dest='stderr_file', - help='command stderr output file') -optparser.add_option('-o', dest='stdout_file', - help='command stdout output file') -optparser.add_option('-l', dest='save_log', action='store_true', - help='save oarsub output log file') -optparser.add_option('-N', dest='job_name', - help='oarsub job name') -optparser.add_option('-q', dest='dest_queue', - help='oarsub destination queue') -optparser.add_option('--qwait', dest='oarsub_timeout', type='int', - help='oarsub queue wait timeout', default=30*60) -optparser.add_option('-t', dest='cmd_timeout', type='int', - help='command execution timeout', default=600*60) - -(options, cmd) = optparser.parse_args() - -if cmd == []: - print >>sys.stderr, "%s: missing command" % progname - sys.exit(1) - -# If we want to do this, need to add check here to make sure cmd[0] is -# a valid PBS job name, else oarsub will die on us. -# -#if not options.job_name: -# options.job_name = cmd[0] - -cwd = os.getcwd() - -# Deal with systems where /n is a symlink to /.automount -if cwd.startswith('/.automount/'): - cwd = cwd.replace('/.automount/', '/n/', 1) - -if not cwd.startswith('/n/poolfs/'): - print >>sys.stderr, "Error: current directory must be under /n/poolfs." - sys.exit(1) - -# The Shell class wraps pexpect.spawn with some handy functions that -# assume the thing on the other end is a Bourne/bash shell. -class Shell(pexpect.spawn): - # Regexp to match the shell prompt. We change the prompt to - # something fixed and distinctive to make it easier to match - # reliably. - prompt_re = re.compile('qdo\$ ') - - def __init__(self, cmd): - # initialize base pexpect.spawn object - try: - pexpect.spawn.__init__(self, cmd) - except pexpect.ExceptionPexpect, exc: - print "%s:" % progname, exc - sys.exit(1) - # full_output accumulates the full output of the session - self.full_output = "" - self.quick_timeout = 15 - # wait for a prompt, then change it - try: - self.expect('\$ ', options.oarsub_timeout) - except pexpect.TIMEOUT: - print >>sys.stderr, "%s: oarsub timed out." % progname - self.kill(9) - self.safe_close() - sys.exit(1) - self.do_command('unset PROMPT_COMMAND; PS1="qdo$ "') - - # version of expect that updates full_output too - def expect(self, regexp, timeout = -1): - pexpect.spawn.expect(self, regexp, timeout) - self.full_output += self.before + self.after - - # Just issue a command and wait for the next prompt. - # Returns a string containing the output of the command. - def do_bare_command(self, cmd, timeout = -1): - global full_output - self.sendline(cmd) - # read back the echo of the command - self.readline() - # wait for the next prompt - self.expect(self.prompt_re, timeout) - output = self.before.rstrip() - return output - - # Issue a command, then query its exit status. - # Returns a (string, int) tuple with the command output and the status. - def do_command(self, cmd, timeout = -1): - # do the command itself - output = self.do_bare_command(cmd, timeout) - # collect status - status = int(self.do_bare_command("echo $?", self.quick_timeout)) - return (output, status) - - # Check to see if the given directory exists. - def dir_exists(self, dirname): - (output, status) = shell.do_command('[ -d %s ]' % dirname, - self.quick_timeout) - return status == 0 - - # Don't actually try to close it.. just wait until it closes by itself - # We can't actually kill the pid which is what it's trying to do, and if - # we call wait we could be in an unfortunate situation of it printing input - # right as we call wait, so the input is never read and the process never ends - def safe_close(self): - count = 0 - while self.isalive() and count < 10: - time.sleep(1) - self.close(force=False) - -# Spawn the interactive pool job. - -# Hack to do link on poolfs... disabled for now since -# compiler/linker/library versioning problems between poolfs and -# nodes. May never work since poolfs is x86-64 and nodes are 32-bit. -if False and len(cmd) > 50: - shell_cmd = 'ssh -t poolfs /bin/sh -l' - print "%s: running %s on poolfs" % (progname, cmd[0]) -else: - shell_cmd = 'oarsub -I' - if options.job_name: - shell_cmd += ' -n "%s"' % options.job_name - if options.dest_queue: - shell_cmd += ' -q ' + options.dest_queue - shell_cmd += ' -d %s' % cwd - -shell = Shell(shell_cmd) - -try: - # chdir to cwd - (output, status) = shell.do_command('cd ' + cwd) - - if status != 0: - raise OSError, "Can't chdir to %s" % cwd - - # wacky hack: sometimes scons will create an output directory then - # fork a job to generate files in that directory, and the job will - # get run before the directory creation propagates through NFS. - # This hack looks for a '-o' option indicating an output file and - # waits for the corresponding directory to appear if necessary. - try: - if 'cc' in cmd[0] or 'g++' in cmd[0]: - output_dir = os.path.dirname(cmd[cmd.index('-o')+1]) - elif 'm5' in cmd[0]: - output_dir = cmd[cmd.index('-d')+1] - else: - output_dir = None - except (ValueError, IndexError): - # no big deal if there's no '-o'/'-d' or if it's the final argument - output_dir = None - - if output_dir: - secs_waited = 0 - while not shell.dir_exists(output_dir) and secs_waited < 90: - time.sleep(5) - secs_waited += 5 - if secs_waited > 30: - print "waited", secs_waited, "seconds for", output_dir - - # run command - if options.stdout_file: - cmd += ['>', options.stdout_file] - if options.stderr_file: - cmd += ['2>', options.stderr_file] - try: - (output, status) = shell.do_command(' '.join(cmd), options.cmd_timeout) - except pexpect.TIMEOUT: - print >>sys.stderr, "%s: command timed out after %d seconds." \ - % (progname, options.cmd_timeout) - shell.sendline('~.') # oarsub/ssh termination escape sequence - shell.safe_close() - status = 3 - if output: - print output -finally: - # end job - if shell.isalive(): - shell.sendline('exit') - shell.expect('Disconnected from OAR job .*') - shell.safe_close() - - # if there was an error, log the output even if not requested - if status != 0 or options.save_log: - log = file('qdo-log.' + str(os.getpid()), 'w') - log.write(shell.full_output) - log.close() -del shell - -sys.exit(status) -- 2.30.2