ruby: slicc: use default argument value
[gem5.git] / util / multi / gem5-multi.sh
1 #! /bin/bash
2
3 #
4 # Copyright (c) 2015 ARM Limited
5 # All rights reserved
6 #
7 # The license below extends only to copyright in the software and shall
8 # not be construed as granting a license to any other intellectual
9 # property including but not limited to intellectual property relating
10 # to a hardware implementation of the functionality of the software
11 # licensed hereunder. You may use the software subject to the license
12 # terms below provided that you ensure that this notice is replicated
13 # unmodified and in its entirety in all distributions of the software,
14 # modified or unmodified, in source code or in binary form.
15 #
16 # Redistribution and use in source and binary forms, with or without
17 # modification, are permitted provided that the following conditions are
18 # met: redistributions of source code must retain the above copyright
19 # notice, this list of conditions and the following disclaimer;
20 # redistributions in binary form must reproduce the above copyright
21 # notice, this list of conditions and the following disclaimer in the
22 # documentation and/or other materials provided with the distribution;
23 # neither the name of the copyright holders nor the names of its
24 # contributors may be used to endorse or promote products derived from
25 # this software without specific prior written permission.
26 #
27 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 #
39 # Authors: Gabor Dozsa
40
41
42 # This is a wrapper script to run a multi gem5 simulations.
43 # See the usage_func() below for hints on how to use it. Also,
44 # there are some examples in the util/multi directory (e.g.
45 # see util/multi/test-2nodes-AArch64.sh)
46 #
47 #
48 # Allocated hosts/cores are assumed to be listed in the LSB_MCPU_HOSTS
49 # environment variable (which is what LSF does by default).
50 # E.g. LSB_MCPU_HOSTS=\"hname1 2 hname2 4\" means we have altogether 6 slots
51 # allocated to launch the gem5 processes, 2 of them are on host hname1
52 # and 4 of them are on host hname2.
53 # If LSB_MCPU_HOSTS environment variable is not defined then we launch all
54 # processes on the localhost.
55 #
56 # Each gem5 process are passed in a unique rank ID [0..N-1] via the kernel
57 # boot params. The total number of gem5 processes is also passed in.
58 # These values can be used in the boot script to configure the MAC/IP
59 # addresses - among other things (see util/multi/bootscript.rcS).
60 #
61 # Each gem5 process will create an m5out.$GEM5_RANK directory for
62 # the usual output files. Furthermore, there will be a separate log file
63 # for each ssh session (we use ssh to start gem5 processes) and one for
64 # the server. These are called log.$GEM5_RANK and log.server.
65 #
66
67
68 # print help
69 usage_func ()
70 {
71 echo "Usage:$0 [-debug] [-n nnodes] [-s server] [-p port] gem5_exe gem5_args"
72 echo " -debug : debug mode (start gem5 in gdb)"
73 echo " nnodes : number of gem5 processes"
74 echo " server : message server executable"
75 echo " port : message server listen port"
76 echo " gem5_exe : gem5 executable (full path required)"
77 echo " gem5_args: usual gem5 arguments ( m5 options, config script options)"
78 echo "Note: if no LSF slots allocation is found all proceses are launched on the localhost."
79 }
80
81
82 # Process (optional) command line options
83
84 while true
85 do
86 case "x$1" in
87 x-n|x-nodes)
88 NNODES=$2
89 shift 2
90 ;;
91 x-s|x-server)
92 TCP_SERVER=$2
93 shift 2
94 ;;
95 x-p|x-port)
96 SERVER_PORT=$2
97 shift 2
98 ;;
99 x-debug)
100 GEM5_DEBUG="-debug"
101 shift 1
102 ;;
103 *)
104 break
105 ;;
106 esac
107 done
108
109 # The remaining command line args must be the usual gem5 command
110 (($# < 2)) && { usage_func; exit -1; }
111 GEM5_EXE=$1
112 shift
113 GEM5_ARGS="$*"
114
115 # Default values to use (in case they are not defined as command line options)
116 DEFAULT_TCP_SERVER=$(dirname $0)/../../util/multi/tcp_server
117 DEFAULT_SERVER_PORT=2200
118
119 [ -z "$TCP_SERVER" ] && TCP_SERVER=$DEFAULT_TCP_SERVER
120 [ -z "$SERVER_PORT" ] && SERVER_PORT=$DEFAULT_SERVER_PORT
121 [ -z "$NNODES" ] && NNODES=2
122
123
124 # Check if all the executables we need exist
125 [ -x "$TCP_SERVER" ] || { echo "Executable ${TCP_SERVER} not found"; exit 1; }
126 [ -x "$GEM5_EXE" ] || { echo "Executable ${GEM5_EXE} not found"; exit 1; }
127
128
129 declare -a SSH_PIDS
130 declare -a HOSTS
131 declare -a NCORES
132
133 # Find out which cluster hosts/slots are allocated or
134 # use localhost if there is no LSF allocation.
135 # We assume that allocated slots are listed in the LSB_MCPU_HOSTS
136 # environment variable in the form:
137 # host1 nslots1 host2 nslots2 ...
138 # (This is what LSF does by default.)
139 NH=0
140 [ "x$LSB_MCPU_HOSTS" != "x" ] || LSB_MCPU_HOSTS="localhost $NNODES"
141 host=""
142 for hc in $LSB_MCPU_HOSTS
143 do
144 if [ "x$host" == "x" ]
145 then
146 host=$hc
147 HOSTS+=($hc)
148 else
149 NCORES+=($hc)
150 ((NH+=hc))
151 host=""
152 fi
153 done
154 ((NNODES==NH)) || { echo "(E) Number of cluster slots ($NH) and gem5 instances ($N) differ"; exit -1; }
155 #echo "hosts: ${HOSTS[@]}"
156 #echo "hosts: ${NCORES[@]}"
157 #echo ${#HOSTS[@]}
158
159
160 # function to clean up and abort if something goes wrong
161 abort_func ()
162 {
163 echo
164 echo "KILLED $(date)"
165 # (try to) kill all gem5 processes on all hosts
166 bname=$(basename $GEM5_EXE)
167 killall -q $bname
168 for h in ${HOSTS[@]}
169 do
170 ssh $h killall -q $bname
171 done
172 sleep 3
173 # kill the message server and the watchdog
174 [ "x$SERVER_PID" != "x" ] && kill $SERVER_PID 2>/dev/null
175 [ "x$WATCHDOG_PID" != "x" ] && kill $WATCHDOG_PID 2>/dev/null
176 exit -1
177 }
178
179
180 # We need a watchdog to trigger full clean up if a gem5 process dies
181 watchdog_func ()
182 {
183 while true
184 do
185 sleep 30
186 ((NDEAD=0))
187 for p in ${SSH_PIDS[*]}
188 do
189 kill -0 $p 2>/dev/null || ((NDEAD+=1))
190 done
191 kill -0 $SERVER_PID || ((NDEAD+=1))
192 if ((NDEAD>0))
193 then
194 # we may be in the middle of an orderly termination,
195 # give it some time to complete before reporting abort
196 sleep 60
197 echo -n "(I) (some) gem5 process(es) exited"
198 abort_func
199 fi
200 done
201 }
202
203 # This function launches the gem5 processes. We use it only to allow launching
204 # gem5 processes under gdb control (in the foreground) for debugging
205 start_func ()
206 {
207 local N=$1
208 local HOST=$2
209 local ENV_ARGS=$3
210 shift 3
211 if [ "x$GEM5_DEBUG" != "x" ]
212 then
213 gdb --args "$@"
214 else
215 ssh $HOST $ENV_ARGS "$@" &>log.$N &
216 fi
217 }
218
219
220 # Trigger full clean up in case we are being killed by external signal
221 trap 'abort_func' INT TERM
222
223 # env args to be passed explicitly to gem5 processes started via ssh
224 ENV_ARGS="LD_LIBRARY_PATH=$LD_LIBRARY_PATH M5_PATH=$M5_PATH"
225
226 # launch the mesage server and check if it has started okay
227 $TCP_SERVER $GEM5_DEBUG $NNODES $SERVER_PORT &>log.server &
228 SERVER_PID=$!
229 sleep 2
230 kill -0 $SERVER_PID || { echo "Failed to start message server"; exit -1; }
231
232 # Now launch all the gem5 processes with ssh.
233 echo "START $(date)"
234 n=0
235 for ((i=0; i < ${#HOSTS[@]}; i++))
236 do
237 h=${HOSTS[$i]}
238 for ((j=0; j < ${NCORES[i]}; j++))
239 do
240 echo "starting gem5 on $h ..."
241 start_func $n $h "$ENV_ARGS" $GEM5_EXE -d $(pwd)/m5out.$n $GEM5_ARGS \
242 --multi \
243 --multi-rank=$n \
244 --multi-server-name=${HOSTS[0]} \
245 --multi-server-port=$SERVER_PORT \
246 --testsys-toplevel-LinuxArmSystem.boot_osflags="\"GEM5_RANK=$n GEM5_SIZE=$NNODES\""
247 SSH_PIDS[$n]=$!
248 ((n+=1))
249 done
250 done
251
252 [ "x$GEM5_DEBUG" == "x" ] || { kill $SERVER_PID; echo "DEBUG exit"; exit -1; }
253
254 # start watchdog to trigger complete abort (after a grace period) if any
255 # gem5 process dies
256 watchdog_func &
257 WATCHDOG_PID=$!
258
259 # wait for exit statuses
260 ((NFAIL=0))
261 for p in ${SSH_PIDS[*]}
262 do
263 wait $p || ((NFAIL+=1))
264 done
265 wait $SERVER_PID || ((NFAIL+=1))
266
267 # all done, let's terminate the watchdog
268 kill $WATCHDOG_PID 2>/dev/null
269
270 if ((NFAIL==0))
271 then
272 echo "EXIT $(date)"
273 else
274 echo "ABORT $(date)"
275 fi