From 8b9baa66ad6ffc9e323b83bc967992439a7d9bfa Mon Sep 17 00:00:00 2001 From: Andres Noetzli Date: Mon, 16 Apr 2018 21:08:38 -0700 Subject: [PATCH] Add timeout (option) to regression script (#1786) This commit adds the option to run regressions with a timeout using the `TEST_TIMEOUT` environment variable. The default timeout is set to 10 minutes. This should make it less likely that our nightly builds hang and makes it easier to sort out slow tests. Default timeout tested with regression level 2 on a debug build with proofs. --- test/regress/Makefile.am | 4 +- test/regress/README.md | 11 ++++- test/regress/run_regression.py | 84 +++++++++++++++++++--------------- 3 files changed, 60 insertions(+), 39 deletions(-) diff --git a/test/regress/Makefile.am b/test/regress/Makefile.am index 19cbbae4a..ba1442c69 100644 --- a/test/regress/Makefile.am +++ b/test/regress/Makefile.am @@ -57,5 +57,5 @@ EXTRA_DIST = \ regress0/tptp/Axioms/SYN000_0.ax \ Makefile.levels \ Makefile.tests \ - run_regression.py \ - README.md \ No newline at end of file + run_regression.py \ + README.md diff --git a/test/regress/README.md b/test/regress/README.md index 2c347bc48..34cf7efde 100644 --- a/test/regress/README.md +++ b/test/regress/README.md @@ -16,7 +16,16 @@ For example: TEST_REGEX=quantifiers make regress0 ``` -Runs regression tests from level 0 that have "quantifiers" in their name. +This runs regression tests from level 0 that have "quantifiers" in their name. + +By default, each invocation of CVC4 is done with a 10 minute timeout. To use a +different timeout, set the `TEST_TIMEOUT` environment variable: + +``` +TEST_TIMEOUT=0.5s make regress0 +``` + +This runs regression tests from level 0 with a 0,5 second timeout. ## Adding New Regressions diff --git a/test/regress/run_regression.py b/test/regress/run_regression.py index a45489d6a..260ab3284 100755 --- a/test/regress/run_regression.py +++ b/test/regress/run_regression.py @@ -15,6 +15,7 @@ import re import shlex import subprocess import sys +import threading SCRUBBER = 'SCRUBBER: ' ERROR_SCRUBBER = 'ERROR-SCRUBBER: ' @@ -24,8 +25,36 @@ EXIT = 'EXIT: ' COMMAND_LINE = 'COMMAND-LINE: ' +def run_process(args, cwd, timeout, s_input=None): + """Runs a process with a timeout `timeout` in seconds. `args` are the + arguments to execute, `cwd` is the working directory and `s_input` is the + input to be sent to the process over stdin. Returns the output, the error + output and the exit code of the process. If the process times out, the + output and the error output are empty and the exit code is 124.""" + + proc = subprocess.Popen( + args, + cwd=cwd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + out = '' + err = '' + exit_status = 124 + timer = threading.Timer(timeout, lambda p: p.kill(), [proc]) + try: + timer.start() + out, err = proc.communicate(input=s_input) + exit_status = proc.returncode + finally: + timer.cancel() + + return out, err, exit_status + + def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary, - command_line, benchmark_dir, benchmark_filename): + command_line, benchmark_dir, benchmark_filename, timeout): """Runs CVC4 on the file `benchmark_filename` in the directory `benchmark_dir` using the binary `cvc4_binary` with the command line options `command_line`. The output is scrubbed using `scrubber` and @@ -44,44 +73,24 @@ def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary, '--preprocess-only', '--dump', 'raw-benchmark', '--output-lang=smt2', '-qq' ] - dump_process = subprocess.Popen( + dump_output, _, _ = run_process( bin_args + command_line + dump_args + [benchmark_filename], - cwd=benchmark_dir, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - dump_output, _ = dump_process.communicate() - process = subprocess.Popen( - bin_args + command_line + ['--lang=smt2', '-'], - cwd=benchmark_dir, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - output, error = process.communicate(input=dump_output) - exit_status = process.returncode + benchmark_dir, timeout) + output, error, exit_status = run_process( + bin_args + command_line + ['--lang=smt2', '-'], benchmark_dir, + timeout, dump_output) else: - process = subprocess.Popen( - bin_args + command_line + [benchmark_filename], - cwd=benchmark_dir, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - output, error = process.communicate() - exit_status = process.returncode + output, error, exit_status = run_process( + bin_args + command_line + [benchmark_filename], benchmark_dir, + timeout) # If a scrubber command has been specified then apply it to the output. if scrubber: - scrubber_process = subprocess.Popen( - shlex.split(scrubber), - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - output, _ = scrubber_process.communicate(input=output) + output, _, _ = run_process( + shlex.split(scrubber), benchmark_dir, timeout, output) if error_scrubber: - error_scrubber_process = subprocess.Popen( - shlex.split(error_scrubber), - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - error, _ = error_scrubber_process.communicate(input=error) + error, _, _ = run_process( + shlex.split(error_scrubber), benchmark_dir, timeout, error) # Popen in Python 3 returns a bytes object instead of a string for # stdout/stderr. @@ -92,7 +101,7 @@ def run_benchmark(dump, wrapper, scrubber, error_scrubber, cvc4_binary, return (output.strip(), error.strip(), exit_status) -def run_regression(proof, dump, wrapper, cvc4_binary, benchmark_path): +def run_regression(proof, dump, wrapper, cvc4_binary, benchmark_path, timeout): """Determines the expected output for a benchmark, runs CVC4 on it and then checks whether the output corresponds to the expected output. Optionally uses a wrapper `wrapper`, tests proof generation (if proof is true), or @@ -241,7 +250,7 @@ def run_regression(proof, dump, wrapper, cvc4_binary, benchmark_path): for command_line_args in command_line_args_configs: output, error, exit_status = run_benchmark( dump, wrapper, scrubber, error_scrubber, cvc4_binary, - command_line_args, benchmark_dir, benchmark_basename) + command_line_args, benchmark_dir, benchmark_basename, timeout) if output != expected_output: print( 'not ok - Differences between expected and actual output on stdout - Flags: {}'. @@ -283,7 +292,10 @@ def main(): if os.environ.get('VALGRIND') == '1' and not wrapper: wrapper = ['libtool', '--mode=execute', 'valgrind'] - run_regression(args.proof, args.dump, wrapper, cvc4_binary, args.benchmark) + timeout = float(os.getenv('TEST_TIMEOUT', 600.0)) + + run_regression(args.proof, args.dump, wrapper, cvc4_binary, args.benchmark, + timeout) if __name__ == "__main__": -- 2.30.2