More fixes to the pbs stuff to make it more robust.
authorNathan Binkert <binkertn@umich.edu>
Thu, 10 Feb 2005 04:55:21 +0000 (23:55 -0500)
committerNathan Binkert <binkertn@umich.edu>
Thu, 10 Feb 2005 04:55:21 +0000 (23:55 -0500)
sim/pyconfig/SConscript:
    Embed the jobfile.py script into the binary so that we don't
    need to copy it into the Base directory every time.
test/genini.py:
    Add the util/pbs directory to the path so we can get to
    jobfile.py
    Add a -I argument to set to add to the path.
util/pbs/pbs.py:
    Create a MyPOpen class.  This is a lot like the popen2.Popen3 class
    in the python library except that my version allows redirection of
    standard in and standard out to a file instead of a pipe.
    Use this popen class to execute qsub or ssh qsub.  This was important
    for the ssh version of qsub because we need to pipe the script into
    standard in of ssh so that the script can get to the qsub command.
    (Otherwise we have a problem discovering the path.)
util/pbs/send.py:
    Tweak the script so it figures out paths in NFS correctly.
    Use the new system for running qsub.

--HG--
extra : convert_revision : 1289915ba99cec6fd464b71215c32d2197ff2824

sim/pyconfig/SConscript
test/genini.py
util/pbs/pbs.py
util/pbs/send.py

index 5708ac9a86a0010ccda0b7d3cdc92cb7af090bfa..9154d3b994f2b32d911fe804163b70228a76aefc 100644 (file)
@@ -170,7 +170,7 @@ EmbedMap %(name)s("%(fname)s",
 /* namespace */ }
 '''
 
-embedded_py_files = ['m5config.py']
+embedded_py_files = ['m5config.py', '../../util/pbs/jobfile.py']
 objpath = os.path.join(env['SRCDIR'], 'objects')
 for root, dirs, files in os.walk(objpath, topdown=True):
     for i,dir in enumerate(dirs):
index 0dac0d4092805955c9a34877799d21072fc7b154..73e7012f622c5e2d1a4d013631d2f3c8a36a737c 100644 (file)
@@ -31,23 +31,26 @@ from os.path import join as joinpath, realpath
 mypath = sys.path[0]
 sys.path.append(joinpath(mypath, '..'))
 sys.path.append(joinpath(mypath, '../configs/kernel'))
+sys.path.append(joinpath(mypath, '../util/pbs'))
 sys.path.append(joinpath(mypath, '../sim/pyconfig'))
 
 from importer import mpy_exec, mpy_execfile, AddToPath
 from m5config import *
 
 try:
-    opts, args = getopt.getopt(sys.argv[1:], '-E:')
-    for o,a in opts:
-        if o == '-E':
-            offset = a.find('=')
+    opts, args = getopt.getopt(sys.argv[1:], '-E:I:')
+    for opt,arg in opts:
+        if opt == '-E':
+            offset = arg.find('=')
             if offset == -1:
-                name = a
+                name = arg
                 value = True
             else:
-                name = a[:offset]
-                value = a[offset+1:]
+                name = arg[:offset]
+                value = arg[offset+1:]
             env[name] = value
+        if opt == '-I':
+            AddToPath(arg)
 except getopt.GetoptError:
     sys.exit('Improper Usage')
 
index a71dbbf8e827cdd444854d2964b927cead2184c3..cd55da7229bc4fc4fb16a66f54765444c261c9b5 100755 (executable)
 #
 # Authors: Nathan Binkert
 
-import os, re, sys
+import os, popen2, re, sys
 
-def ssh(host, script, tty = False, user = ''):
-    args = [ 'ssh', '-x' ]
-    if user:
-        args.append('-l' + user)
-    if tty:
-        args.append('-t')
-    args.append(host)
-    args.append(script)
+class MyPOpen(object):
+    def __init__(self, cmd, input = None, output = None, bufsize = -1):
+        self.sts = -1
 
-    return os.spawnvp(os.P_WAIT, args[0], args)
+        if input is None:
+            p2c_read, p2c_write = os.pipe()
+            self.tochild = os.fdopen(p2c_write, 'w', bufsize)
+        else:
+            p2c_write = None
+            if isinstance(input, file):
+                p2c_read = input.fileno()
+            elif isinstance(input, str):
+                input = file(input, 'r')
+                p2c_read = input.fileno()
+            elif isinstance(input, int):
+                p2c_read = input
+            else:
+                raise AttributeError
+
+        if output is None:
+            c2p_read, c2p_write = os.pipe()
+            self.fromchild = os.fdopen(c2p_read, 'r', bufsize)
+        else:
+            c2p_read = None
+            if isinstance(output, file):
+                c2p_write = output.fileno()
+            elif isinstance(output, str):
+                output = file(output, 'w')
+                c2p_write = output.fileno()
+            elif isinstance(output, int):
+                c2p_write = output
+            else:
+                raise AttributeError
+
+        self.pid = os.fork()
+        if self.pid == 0:
+            os.dup2(p2c_read, 0)
+            os.dup2(c2p_write, 1)
+            os.dup2(c2p_write, 2)
+            if isinstance(cmd, basestring):
+                cmd = ['/bin/sh', '-c', cmd]
+            if False:
+                for i in range(3, MAXFD):
+                    try:
+                        os.close(i)
+                    except OSError:
+                        pass
+            try:
+                os.execvp(cmd[0], cmd)
+            finally:
+                os._exit(1)
+
+        os.close(p2c_read)
+        os.close(c2p_write)
+
+    def poll(self):
+        if self.sts < 0:
+            pid, sts = os.waitpid(self.pid, os.WNOHANG)
+            if pid == self.pid:
+                self.sts = sts
+        return self.sts
+
+    def wait(self):
+        if self.sts < 0:
+            pid, sts = os.waitpid(self.pid, 0)
+            if pid == self.pid:
+                self.sts = sts
+        return self.sts
 
 class qsub:
     def __init__(self):
@@ -56,37 +114,35 @@ class qsub:
         self.pbshost = ''
         self.qsub = 'qsub'
         self.env = {}
-        self.onlyecho = False
-        self.verbose = False
 
-    def do(self, script, ):
-        args = [self.qsub]
+    def build(self, script, args = []):
+        self.cmd = [ self.qsub ]
 
         if self.env:
             arg = '-v'
             arg += ','.join([ '%s=%s' % i for i in self.env.iteritems() ])
-            args.append(arg)
+            self.cmd.append(arg)
 
         if self.hold:
-            args.append('-h')
+            self.cmd.append('-h')
 
         if len(self.stdout):
-            args.append('-olocalhost:' + self.stdout)
+            self.cmd.append('-olocalhost:' + self.stdout)
 
         if self.keep_stdout and self.keep_stderr:
-            args.append('-koe')
+            self.cmd.append('-koe')
         elif self.keep_stdout:
-            args.append('-ko')
+            self.cmd.append('-ko')
         elif self.keep_stderr:
-            args.append('-ke')
+            self.cmd.append('-ke')
         else:
-            args.append('-kn')
+            self.cmd.append('-kn')
 
         if self.join:
-            args.append('-joe')
+            self.cmd.append('-joe')
 
         if len(self.node_type):
-            args.append('-lnodes=' + self.node_type)
+            self.cmd.append('-lnodes=' + self.node_type)
 
         if self.mail_abort or self.mail_begin or self.mail_end:
             flags = ''
@@ -97,30 +153,32 @@ class qsub:
             if self.mail_end:
                 flags.append('e')
             if len(flags):
-                args.append('-m ' + flags)
+                self.cmd.append('-m ' + flags)
 
         if len(self.name):
-            args.append("-N%s" % self.name)
+            self.cmd.append("-N%s" % self.name)
 
         if self.priority != 0:
-            args.append('-p' + self.priority)
+            self.cmd.append('-p' + self.priority)
 
         if len(self.queue):
-            args.append('-q' + self.queue)
-
-        args.append(script)
-
-        if self.verbose or self.onlyecho:
-            print >>sys.stderr, 'PBS Command:   ', ' '.join(args)
-
-        if self.onlyecho:
-            return 0
-
-        print >>sys.stderr, 'PBS Jobid:      ',
-
-        ec = os.spawnvp(os.P_WAIT, args[0], args)
-
-        if ec != 0 and len(self.pbshost):
-            ec = ssh(self.pbshost, ' '.join(args))
+            self.cmd.append('-q' + self.queue)
+
+        self.cmd.extend(args)
+        self.script = script
+        self.command = ' '.join(self.cmd + [ self.script ])
+
+    def do(self):
+        pbs = MyPOpen(self.cmd + [ self.script ])
+        self.result = pbs.fromchild.read()
+        ec = pbs.wait()
+
+        if ec != 0 and self.pbshost:
+            cmd = ' '.join(self.cmd + [ '-' ])
+            cmd = [ 'ssh', '-x', self.pbshost, cmd ]
+            self.command = ' '.join(cmd)
+            ssh = MyPOpen(cmd, input = self.script)
+            self.result = ssh.fromchild.read()
+            ec = ssh.wait()
 
         return ec
index 3741b869666b40225733dbe8f5f65ed1e73461cd..4daf15b454acc2ea7e536f5342a148db84f2d6e0 100755 (executable)
 # Authors: Ali Saidi
 #          Nathan Binkert
 
-import os, os.path, re, sys
+import os, os.path, re, socket, sys
 from os import environ as env, listdir
 from os.path import basename, isdir, isfile, islink, join as joinpath
 from filecmp import cmp as filecmp
 from shutil import copyfile
 
+def nfspath(dir):
+    if dir.startswith('/.automount/'):
+        dir = '/n/%s' % dir[12:]
+    elif not dir.startswith('/n/'):
+        dir = '/n/%s%s' % (socket.gethostname().split('.')[0], dir)
+    return dir
+
+progpath = nfspath(sys.path[0])
 progname = basename(sys.argv[0])
 usage = """\
 Usage:
@@ -65,7 +73,7 @@ force = False
 listonly = False
 queue = ''
 verbose = False
-rootdir = re.sub(r'^/\.automount/', r'/n/', os.getcwd())
+rootdir = nfspath(os.getcwd())
 for opt,arg in opts:
     if opt == '-c':
         clean = True
@@ -92,7 +100,8 @@ for arg in args:
     exprs.append(re.compile(arg))
 
 if not listonly and not onlyecho and isdir(linkdir):
-    print 'Checking for outdated files in Link directory'
+    if verbose:
+        print 'Checking for outdated files in Link directory'
     entries = listdir(linkdir)
     for entry in entries:
         link = joinpath(linkdir, entry)
@@ -156,8 +165,8 @@ for jobname in joblist:
     if not onlyecho and not os.path.isdir(jobdir):
         sys.exit('%s is not a directory.  Cannot build job' % jobdir)
 
-    print >>sys.stderr, 'Job name:       %s' % jobname
-    print >>sys.stderr, 'Job directory:  %s' % jobdir
+    print 'Job name:       %s' % jobname
+    print 'Job directory:  %s' % jobdir
 
     qsub = pbs.qsub()
     qsub.pbshost = 'simpool.eecs.umich.edu'
@@ -165,11 +174,17 @@ for jobname in joblist:
     qsub.name = jobname
     qsub.join = True
     qsub.node_type = 'FAST'
-    qsub.onlyecho = onlyecho
     qsub.env['ROOTDIR'] = rootdir
-    qsub.verbose = verbose
     if len(queue):
         qsub.queue = queue
+    qsub.build(joinpath(progpath, 'job.py'))
 
-    qsub.do(joinpath(basedir, 'job.py'))
-    print >>sys.stderr, ''
+    if verbose:
+        print 'PBS Command:    %s' % qsub.command
+
+    if not onlyecho:
+        ec = qsub.do()
+        if ec == 0:
+            print 'PBS Jobid:      %s' % qsub.result
+        else:
+            print 'PBS Failed'