4 # Copyright (c) 2015 ARM Limited
7 # The license below extends only to copyright in the software and shall
8 # not be construed as granting a license to any other intellectual
9 # property including but not limited to intellectual property relating
10 # to a hardware implementation of the functionality of the software
11 # licensed hereunder. You may use the software subject to the license
12 # terms below provided that you ensure that this notice is replicated
13 # unmodified and in its entirety in all distributions of the software,
14 # modified or unmodified, in source code or in binary form.
16 # Copyright (c) 2015 University of Illinois Urbana Champaign
19 # Redistribution and use in source and binary forms, with or without
20 # modification, are permitted provided that the following conditions are
21 # met: redistributions of source code must retain the above copyright
22 # notice, this list of conditions and the following disclaimer;
23 # redistributions in binary form must reproduce the above copyright
24 # notice, this list of conditions and the following disclaimer in the
25 # documentation and/or other materials provided with the distribution;
26 # neither the name of the copyright holders nor the names of its
27 # contributors may be used to endorse or promote products derived from
28 # this software without specific prior written permission.
30 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
31 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
32 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
33 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
34 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
36 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
37 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
38 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
40 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 # This is a wrapper script to run a dist gem5 simulations.
44 # See the usage_func() below for hints on how to use it. Also,
45 # there are some examples in the util/dist directory (e.g.
46 # see util/dist/test-2nodes-AArch64.sh)
49 # Allocated hosts/cores are assumed to be listed in the LSB_MCPU_HOSTS
50 # environment variable (which is what LSF does by default).
51 # E.g. LSB_MCPU_HOSTS=\"hname1 2 hname2 4\" means we have altogether 6 slots
52 # allocated to launch the gem5 processes, 2 of them are on host hname1
53 # and 4 of them are on host hname2.
54 # If LSB_MCPU_HOSTS environment variable is not defined then we launch all
55 # processes on the localhost.
57 # Each gem5 process are passed in a unique rank ID [0..N-1] via the kernel
58 # boot params. The total number of gem5 processes is also passed in.
59 # These values can be used in the boot script to configure the MAC/IP
60 # addresses - among other things (see util/dist/bootscript.rcS).
62 # Each gem5 process will create an m5out.$GEM5_RANK directory for
63 # the usual output files. Furthermore, there will be a separate log file
64 # for each ssh session (we use ssh to start gem5 processes) and one for
65 # the server. These are called log.$GEM5_RANK and log.switch.
72 echo "Usage:$0 [-debug] [-n nnodes] [-r rundir] [-c ckptdir] [-p port] [-sw switch] [--sw-args sw_args] [-fs fullsystem] [--fs-args fs_args] [--cf-args conf_args] [--m5-args m5_args] -x gem5_exe "
73 echo " -debug : debug mode (start gem5 in gdb)"
74 echo " nnodes : number of gem5 processes"
75 echo " rundir : run simulation under this path. If not specified, current dir will be used"
76 echo " ckptdir : dump/restore checkpoints to/from this path. If not specified, current dir will be used"
78 echo " fullsystem: fullsystem config file"
79 echo " fs_args : fullsystem config specific argument list: arg1 arg2 ..."
80 echo " port : switch listen port"
81 echo " switch : switch config file"
82 echo " sw_args : switch config specific argument list: arg1 arg2 ..."
83 echo " conf_args : common (for both fullsystem and switch) config argument list: arg1 arg2 ..."
84 echo " gem5_exe : gem5 executable (full path required)"
85 echo " m5_args : common m5 argument list (e.g. debug flags): arg1 arg2 ..."
86 echo "Note: if no LSF slots allocation is found all proceses are launched on the localhost."
89 # Process (optional) command line options
146 [ -n "$CUR_ARGS" ] ||
{ echo "Unexpected arg: $1"; usage_func
; exit -1; }
149 eval $CUR_ARGS=\"${!CUR_ARGS} $1\"
153 eval $CUR_ARGS=\"${!CUR_ARGS} $1 $2\"
159 echo "Unknown arg: $1"
166 # Default values to use (in case they are not defined as command line options)
167 DEFAULT_FS_CONFIG
=$M5_PATH/configs
/example
/fs.py
168 DEFAULT_SW_CONFIG
=$M5_PATH/configs
/dist
/sw.py
171 [ -z "$FS_CONFIG" ] && FS_CONFIG
=$DEFAULT_FS_CONFIG
172 [ -z "$SW_CONFIG" ] && SW_CONFIG
=$DEFAULT_SW_CONFIG
173 [ -z "$SW_PORT" ] && SW_PORT
=$DEFAULT_SW_PORT
174 [ -z "$NNODES" ] && NNODES
=2
175 [ -z "$RUN_DIR" ] && RUN_DIR
=$
(pwd)
176 [ -z "$CKPT_DIR" ] && CKPT_DIR
=$
(pwd)
178 # Check if all the executables we need exist
179 [ -f "$FS_CONFIG" ] ||
{ echo "FS config ${FS_CONFIG} not found"; exit 1; }
180 [ -f "$SW_CONFIG" ] ||
{ echo "Switch config ${SW_CONFIG} not found"; exit 1; }
181 [ -x "$GEM5_EXE" ] ||
{ echo "Executable ${GEM5_EXE} not found"; exit 1; }
182 # make sure that RUN_DIR exists
183 mkdir
-p $RUN_DIR > /dev
/null
2>&1
189 # Find out which cluster hosts/slots are allocated or
190 # use localhost if there is no LSF allocation.
191 # We assume that allocated slots are listed in the LSB_MCPU_HOSTS
192 # environment variable in the form:
193 # host1 nslots1 host2 nslots2 ...
194 # (This is what LSF does by default.)
196 [ "x$LSB_MCPU_HOSTS" != "x" ] || LSB_MCPU_HOSTS
="127.0.0.1 $NNODES"
198 for hc
in $LSB_MCPU_HOSTS
200 if [ "x$host" == "x" ]
210 ((NNODES
==NH
)) ||
{ echo "(E) Number of cluster slots ($NH) and gem5 instances ($N) differ"; exit -1; }
212 # function to clean up and abort if something goes wrong
216 echo "KILLED $(date)"
217 # Try to Kill the server first. That should trigger an exit for all connected
219 [ "x$SW_PID" != "x" ] && kill $SW_PID 2>/dev
/null
221 # (try to) kill gem5 processes - just in case something went wrong with the
222 # server triggered exit
223 bname
=$
(basename $GEM5_EXE)
224 killall
-q -s SIGKILL
$bname
227 ssh $h killall
-q -s SIGKILL
$bname
231 [ "x$WATCHDOG_PID" != "x" ] && kill $WATCHDOG_PID 2>/dev
/null
235 # We need a watchdog to trigger full clean up if a gem5 process dies
242 for p
in ${SSH_PIDS[*]}
244 kill -0 $p 2>/dev
/null ||
((NDEAD
+=1))
246 kill -0 $SW_PID ||
((NDEAD
+=1))
249 # we may be in the middle of an orderly termination,
250 # give it some time to complete before reporting abort
252 echo -n "(I) (some) gem5 process(es) exited"
258 # This function launches the gem5 processes. The only purpose is to enable
259 # launching gem5 processes under gdb control for debugging
266 if [ "x$GEM5_DEBUG" != "x" ]
268 echo "DEBUG starting terminal..."
270 xterm
-e "gdb --args $MY_ARGS" &
272 ssh $HOST $ENV_ARGS "$@" &> $RUN_DIR/log.
$N &
276 # block till the gem5 process starts
281 echo -n "waiting for $3 to start "
284 kill -0 $4 ||
{ echo "Failed to start $3"; exit -1; }
285 [[ -f "$FILE" ]] && \
286 grep -q "$STRING" "$FILE" && \
287 echo -e "\nnode #$3 started" && \
295 # Trigger full clean up in case we are being killed by external signal
296 trap 'abort_func' INT TERM
298 # env args to be passed explicitly to gem5 processes started via ssh
299 ENV_ARGS
="LD_LIBRARY_PATH=$LD_LIBRARY_PATH M5_PATH=$M5_PATH"
301 #cleanup log files before starting gem5 processes
302 rm $RUN_DIR/log.switch
> /dev
/null
2>&1
304 # make sure that CKPT_DIR exists
305 mkdir
-p $CKPT_DIR/m5out.switch
> /dev
/null
2>&1
308 echo "launch switch gem5 process on $SW_HOST ..."
309 start_func
"switch" $SW_HOST "$ENV_ARGS" $GEM5_EXE -d $RUN_DIR/m5out.switch \
314 --checkpoint-dir=$CKPT_DIR/m5out.switch \
316 --dist-size=$NNODES \
317 --dist-server-port=$SW_PORT
320 # block here till switch process starts
321 connected
$RUN_DIR/log.switch
"tcp_iface listening on port" "switch" $SW_PID
322 LINE
=$
(grep -r "tcp_iface listening on port" $RUN_DIR/log.switch
)
324 IFS
=' ' read -ra ADDR
<<< "$LINE"
325 # actual port that switch is listening on may be different
326 # from what we specified if the port was busy
329 # Now launch all the gem5 processes with ssh.
332 for ((i
=0; i
< ${#HOSTS[@]}; i
++))
335 for ((j
=0; j
< ${NCORES[i]}; j
++))
337 #cleanup log files before starting gem5 processes
338 rm $RUN_DIR/log.
$n > /dev
/null
2>&1
339 # make sure that CKPT_DIR exists
340 mkdir
-p $CKPT_DIR/m5out.
$n > /dev
/null
2>&1
341 echo "starting gem5 on $h ..."
342 start_func
$n $h "$ENV_ARGS" $GEM5_EXE -d $RUN_DIR/m5out.
$n \
347 --checkpoint-dir=$CKPT_DIR/m5out.
$n \
350 --dist-size=$NNODES \
351 --dist-server-name=${HOSTS[0]} \
352 --dist-server-port=$SW_PORT
358 # Wait here if it is a debug session
359 [ "x$GEM5_DEBUG" == "x" ] ||
{ echo "DEBUG session"; wait $SW_PID; exit -1; }
361 # start watchdog to trigger complete abort (after a grace period) if any
366 # wait for exit statuses
368 for p
in ${SSH_PIDS[*]}
370 wait $p ||
((NFAIL
+=1))
372 wait $SW_PID ||
((NFAIL
+=1))
374 # all done, let's terminate the watchdog
375 kill $WATCHDOG_PID 2>/dev
/null