2 # Copyright (c) 2006 The Regents of The University of Michigan
5 # Redistribution and use in source and binary forms, with or without
6 # modification, are permitted provided that the following conditions are
7 # met: redistributions of source code must retain the above copyright
8 # notice, this list of conditions and the following disclaimer;
9 # redistributions in binary form must reproduce the above copyright
10 # notice, this list of conditions and the following disclaimer in the
11 # documentation and/or other materials provided with the distribution;
12 # neither the name of the copyright holders nor the names of its
13 # contributors may be used to endorse or promote products derived from
14 # this software without specific prior written permission.
16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 import os
, os
.path
, re
, socket
, sys
31 from os
import environ
as env
, listdir
32 from os
.path
import basename
, isdir
, isfile
, islink
, join
as joinpath
, normpath
33 from filecmp
import cmp as filecmp
34 from shutil
import copy
37 if dir.startswith('/.automount/'):
38 dir = '/n/%s' % dir[12:]
39 elif not dir.startswith('/n/'):
40 dir = '/n/%s%s' % (socket
.gethostname().split('.')[0], dir)
43 def syncdir(srcdir
, destdir
):
44 srcdir
= normpath(srcdir
)
45 destdir
= normpath(destdir
)
46 if not isdir(destdir
):
47 sys
.exit('destination directory "%s" does not exist' % destdir
)
49 for root
, dirs
, files
in os
.walk(srcdir
):
51 prefix
= os
.path
.commonprefix([root
, srcdir
])
52 root
= root
[len(prefix
):]
53 if root
.startswith('/'):
55 for rem
in [ d
for d
in dirs
if d
.startswith('.') or d
== 'SCCS']:
59 newdir
= joinpath(destdir
, root
, entry
)
64 for i
,d
in enumerate(dirs
):
65 if islink(joinpath(srcdir
, root
, d
)):
66 dirs
[i
] = joinpath(d
, '.')
69 dest
= normpath(joinpath(destdir
, root
, entry
))
70 src
= normpath(joinpath(srcdir
, root
, entry
))
71 if not isfile(dest
) or not filecmp(src
, dest
):
72 print 'copy %s %s' % (dest
, src
)
75 progpath
= nfspath(sys
.path
[0])
76 progname
= basename(sys
.argv
[0])
79 %(progname)s [-c] [-e] [-f] [-j <jobfile>] [-q queue] [-v] <regexp>
80 -c clean directory if job can be run
81 -C submit the checkpointing runs
82 -d Make jobs be dependent on the completion of the checkpoint runs
83 -e only echo pbs command info, don't actually send the job
84 -f force the job to run regardless of state
85 -q <queue> submit job to the named queue
86 -j <jobfile> specify the jobfile (default is <rootdir>/Test.py)
89 %(progname)s [-j <jobfile>] -l [-v] <regexp>
90 -j <jobfile> specify the jobfile (default is <rootdir>/Test.py)
91 -l list job names, don't submit
92 -v be verbose (list job parameters)
100 opts
, args
= getopt
.getopt(sys
.argv
[1:], '-Ccdefhj:lnq:Rt:v')
101 except getopt
.GetoptError
:
152 exprs
.append(re
.compile(arg
))
154 import jobfile
, batch
155 from job
import JobDir
, date
157 conf
= jobfile
.JobFile(jfile
)
159 if update
and not listonly
and not onlyecho
and isdir(conf
.linkdir
):
161 print 'Checking for outdated files in Link directory'
162 if not isdir(conf
.basedir
):
163 os
.mkdir(conf
.basedir
)
164 syncdir(conf
.linkdir
, conf
.basedir
)
169 if docpts
and doruns
:
172 gen
= conf
.checkpoints()
177 if job
.name
in jobnames
:
182 if expr
.match(job
.name
):
200 jobdir
= JobDir(joinpath(conf
.rootdir
, job
.name
))
203 status
= jobdir
.getstatus()
204 if status
== 'queued':
207 if status
== 'running':
210 if status
== 'success':
214 sys
.exit('job directory %s not clean!' % jobdir
)
220 class NameHack(object):
221 def __init__(self
, host
='pbs.pool', port
=24465):
226 def setname(self
, jobid
, jobname
):
230 jobid
= int(jobid
.strip().split('.')[0])
232 jobname
= jobname
.strip()
233 # since pbs can handle jobnames of 15 characters or less,
234 # don't use the raj hack.
235 if len(jobname
) <= 15:
238 if self
.socket
is None:
240 self
.socket
= socket
.socket()
241 # Connect to pbs.pool and send the jobid/jobname pair to port
242 # 24465 (Raj didn't realize that there are only 64k ports and
243 # setup inetd to point to port 90001)
244 self
.socket
.connect((self
.host
, self
.port
))
246 self
.socket
.send("%s %s\n" % (jobid
, jobname
))
248 namehack
= NameHack()
250 rootdir
= conf
.rootdir
251 script
= joinpath(rootdir
, 'Base', 'job.py')
254 jobdir
= JobDir(joinpath(rootdir
, job
.name
))
256 cptdir
= JobDir(joinpath(rootdir
, job
.checkpoint
.name
))
258 if not isdir(path
) or not isfile(joinpath(path
, '.success')):
261 cptjob
= cptdir
.readval('.batch_jobid')
265 os
.chdir(str(jobdir
))
266 os
.environ
['PWD'] = str(jobdir
)
268 print 'Job name: %s' % job
.name
269 print 'Job directory: %s' % jobdir
272 qsub
= batch
.oarsub()
273 qsub
.oarhost
= 'poolfs.eecs.umich.edu'
274 #qsub.stdout = jobdir.file('jobout')
278 #qsub.node_type = node_type
279 #qsub.env['ROOTDIR'] = conf.rootdir
280 #qsub.env['JOBNAME'] = job.name
282 # qsub.afterok = cptjob
285 qsub
.properties
= "64bit = 'Yes' or 64bit = 'No'"
289 print 'cwd: %s' % qsub
.command
290 print 'PBS Command: %s' % qsub
.command
296 print 'OAR Jobid: %s' % jobid
297 #namehack.setname(jobid, job.name)
299 jobdir
.echofile('.batch_jobid', jobid
)
300 jobdir
.echofile('.batch_jobname', job
.name
)
301 jobdir
.echofile('.queued', queued
)
302 jobdir
.setstatus('queued on %s' % queued
)