2 # Copyright (c) 2005 The Regents of The University of Michigan
5 # Redistribution and use in source and binary forms, with or without
6 # modification, are permitted provided that the following conditions are
7 # met: redistributions of source code must retain the above copyright
8 # notice, this list of conditions and the following disclaimer;
9 # redistributions in binary form must reproduce the above copyright
10 # notice, this list of conditions and the following disclaimer in the
11 # documentation and/or other materials provided with the distribution;
12 # neither the name of the copyright holders nor the names of its
13 # contributors may be used to endorse or promote products derived from
14 # this software without specific prior written permission.
16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 import os
, os
.path
, re
, socket
, sys
32 from os
import environ
as env
, listdir
33 from os
.path
import basename
, isdir
, isfile
, islink
, join
as joinpath
, normpath
34 from filecmp
import cmp as filecmp
35 from shutil
import copy
38 if dir.startswith('/.automount/'):
39 dir = '/n/%s' % dir[12:]
40 elif not dir.startswith('/n/'):
41 dir = '/n/%s%s' % (socket
.gethostname().split('.')[0], dir)
44 def syncdir(srcdir
, destdir
):
45 srcdir
= normpath(srcdir
)
46 destdir
= normpath(destdir
)
47 if not isdir(destdir
):
48 sys
.exit('destination directory "%s" does not exist' % destdir
)
50 for root
, dirs
, files
in os
.walk(srcdir
):
52 prefix
= os
.path
.commonprefix([root
, srcdir
])
53 root
= root
[len(prefix
):]
54 if root
.startswith('/'):
56 for rem
in [ d
for d
in dirs
if d
.startswith('.') or d
== 'SCCS']:
60 newdir
= joinpath(destdir
, root
, entry
)
65 for i
,d
in enumerate(dirs
):
66 if islink(joinpath(srcdir
, root
, d
)):
67 dirs
[i
] = joinpath(d
, '.')
70 dest
= normpath(joinpath(destdir
, root
, entry
))
71 src
= normpath(joinpath(srcdir
, root
, entry
))
72 if not isfile(dest
) or not filecmp(src
, dest
):
73 print 'copy %s %s' % (dest
, src
)
76 progpath
= nfspath(sys
.path
[0])
77 progname
= basename(sys
.argv
[0])
80 %(progname)s [-c] [-e] [-f] [-j <jobfile>] [-q queue] [-v] <regexp>
81 -c clean directory if job can be run
82 -C submit the checkpointing runs
83 -d Make jobs be dependent on the completion of the checkpoint runs
84 -e only echo pbs command info, don't actually send the job
85 -f force the job to run regardless of state
86 -q <queue> submit job to the named queue
87 -j <jobfile> specify the jobfile (default is <rootdir>/Test.py)
90 %(progname)s [-j <jobfile>] -l [-v] <regexp>
91 -j <jobfile> specify the jobfile (default is <rootdir>/Test.py)
92 -l list job names, don't submit
93 -v be verbose (list job parameters)
101 opts
, args
= getopt
.getopt(sys
.argv
[1:], '-Ccdefhj:lnq:Rt:v')
102 except getopt
.GetoptError
:
153 exprs
.append(re
.compile(arg
))
156 from job
import JobDir
, date
158 conf
= jobfile
.JobFile(jfile
)
160 if update
and not listonly
and not onlyecho
and isdir(conf
.linkdir
):
162 print 'Checking for outdated files in Link directory'
163 if not isdir(conf
.basedir
):
164 os
.mkdir(conf
.basedir
)
165 syncdir(conf
.linkdir
, conf
.basedir
)
170 if docpts
and doruns
:
173 gen
= conf
.checkpoints()
178 if job
.name
in jobnames
:
183 if expr
.match(job
.name
):
201 jobdir
= JobDir(joinpath(conf
.rootdir
, job
.name
))
204 status
= jobdir
.getstatus()
205 if status
== 'queued':
208 if status
== 'running':
211 if status
== 'success':
215 sys
.exit('job directory %s not clean!' % jobdir
)
221 class NameHack(object):
222 def __init__(self
, host
='pbs.pool', port
=24465):
227 def setname(self
, jobid
, jobname
):
231 jobid
= int(jobid
.strip().split('.')[0])
233 jobname
= jobname
.strip()
234 # since pbs can handle jobnames of 15 characters or less,
235 # don't use the raj hack.
236 if len(jobname
) <= 15:
239 if self
.socket
is None:
241 self
.socket
= socket
.socket()
242 # Connect to pbs.pool and send the jobid/jobname pair to port
243 # 24465 (Raj didn't realize that there are only 64k ports and
244 # setup inetd to point to port 90001)
245 self
.socket
.connect((self
.host
, self
.port
))
247 self
.socket
.send("%s %s\n" % (jobid
, jobname
))
249 namehack
= NameHack()
252 jobdir
= JobDir(joinpath(conf
.rootdir
, job
.name
))
254 cptdir
= JobDir(joinpath(conf
.rootdir
, job
.checkpoint
.name
))
255 cptjob
= cptdir
.readval('.pbs_jobid')
260 print 'Job name: %s' % job
.name
261 print 'Job directory: %s' % jobdir
264 qsub
.pbshost
= 'simpool.eecs.umich.edu'
265 qsub
.stdout
= jobdir
.file('jobout')
266 qsub
.name
= job
.name
[:15]
268 qsub
.node_type
= node_type
269 qsub
.env
['ROOTDIR'] = conf
.rootdir
270 qsub
.env
['JOBNAME'] = job
.name
272 qsub
.afterok
= cptjob
275 qsub
.build(joinpath(progpath
, 'job.py'))
278 print 'PBS Command: %s' % qsub
.command
284 print 'PBS Jobid: %s' % jobid
285 namehack
.setname(jobid
, job
.name
)
287 jobdir
.echofile('.pbs_jobid', jobid
)
288 jobdir
.echofile('.pbs_jobname', job
.name
)
289 jobdir
.echofile('.queued', queued
)
290 jobdir
.setstatus('queued on %s' % queued
)