Improve analysis of racy testcases

author Sergio Durigan Junior <sergiodj@redhat.com>

Sun, 6 Mar 2016 01:37:11 +0000 (20:37 -0500)

committer Sergio Durigan Junior <sergiodj@redhat.com>

Sun, 6 Mar 2016 01:43:40 +0000 (20:43 -0500)
author Sergio Durigan Junior <sergiodj@redhat.com>
Sun, 6 Mar 2016 01:37:11 +0000 (20:37 -0500)
committer Sergio Durigan Junior <sergiodj@redhat.com>
Sun, 6 Mar 2016 01:43:40 +0000 (20:43 -0500)
diff --git a/gdb/testsuite/ChangeLog b/gdb/testsuite/ChangeLog

index 1811103d659602225478978bd2ac6c46f034bb69..a96156d7e436ba373978d53daef608d55fd9f466 100644 (file)
--- a/gdb/testsuite/ChangeLog
+++ b/gdb/testsuite/ChangeLog
@@ -1,3 +1,15 @@
+2016-03-05  Sergio Durigan Junior  <sergiodj@redhat.com>
+
+       * Makefile.in (DEFAULT_RACY_ITER): New variable.
+       (CHECK_TARGET_TMP): Likewise.
+       (check-single-racy): New rule.
+       (check-parallel-racy): Likewise.
+       (TEST_TARGETS): Adjust rule to account for RACY_ITER.
+       (do-check-parallel-racy): New rule.
+       (check-racy/%.exp): Likewise.
+       * README (Racy testcases): New section.
+       * analyze-racy-logs.py: New file.
+
  2016-03-03  Doug Evans  <dje@google.com>
  
         * gdb.base/skip.exp: Use with_test_prefix.
diff --git a/gdb/testsuite/Makefile.in b/gdb/testsuite/Makefile.in

index 38c30525ec920905d53298a42e61fa1f79df6e55..7853275617e6f632b120269ad9294db38dd6ac46 100644 (file)
--- a/gdb/testsuite/Makefile.in
+++ b/gdb/testsuite/Makefile.in
@@ -52,6 +52,12 @@ RUNTESTFLAGS =
  
  FORCE_PARALLEL =
  
+# Default number of iterations that we will use to run the testsuite
+# if the user does not specify the RACY_ITER environment variable
+# (e.g., when the user calls the make rule directly from the command
+# line).
+DEFAULT_RACY_ITER = 3
+
  RUNTEST_FOR_TARGET = `\
    if [ -f $${srcdir}/../../dejagnu/runtest ]; then \
      echo $${srcdir}/../../dejagnu/runtest; \
@@ -140,7 +146,8 @@ installcheck:
  # given.  If RUNTESTFLAGS is not empty, then by default the tests will
  # be serialized.  This can be overridden by setting FORCE_PARALLEL to
  # any non-empty value.  For a non-GNU make, do not parallelize.
-@GMAKE_TRUE@CHECK_TARGET = $(if $(FORCE_PARALLEL),check-parallel,$(if $(RUNTESTFLAGS),check-single,$(if $(saw_dash_j),check-parallel,check-single)))
+@GMAKE_TRUE@CHECK_TARGET_TMP = $(if $(FORCE_PARALLEL),check-parallel,$(if $(RUNTESTFLAGS),check-single,$(if $(saw_dash_j),check-parallel,check-single)))
+@GMAKE_TRUE@CHECK_TARGET = $(if $(RACY_ITER),$(addsuffix -racy,$(CHECK_TARGET_TMP)),$(CHECK_TARGET_TMP))
  @GMAKE_FALSE@CHECK_TARGET = check-single
  
  # Note that we must resort to a recursive make invocation here,
@@ -190,6 +197,26 @@ DO_RUNTEST = \
  check-single:
         $(DO_RUNTEST) $(RUNTESTFLAGS) $(expanded_tests_or_none)
  
+check-single-racy:
+       -rm -rf cache racy_outputs temp
+       mkdir -p racy_outputs; \
+       racyiter="$(RACY_ITER)"; \
+       test "x$$racyiter" == "x" && \
+         racyiter=$(DEFAULT_RACY_ITER); \
+       if test $$racyiter -lt 2 ; then \
+         echo "RACY_ITER must be at least 2."; \
+         exit 1; \
+       fi; \
+       trap "exit" INT; \
+       for n in `seq $$racyiter` ; do \
+         mkdir -p racy_outputs/$$n; \
+         $(DO_RUNTEST) --outdir=racy_outputs/$$n $(RUNTESTFLAGS) \
+           $(expanded_tests_or_none); \
+       done; \
+       $(srcdir)/analyze-racy-logs.py \
+         `ls racy_outputs/*/gdb.sum` > racy.sum; \
+       sed -n '/=== gdb Summary ===/,$$ p' racy.sum
+
  check-parallel:
         -rm -rf cache outputs temp
         $(MAKE) -k do-check-parallel; \
@@ -201,6 +228,31 @@ check-parallel:
         sed -n '/=== gdb Summary ===/,$$ p' gdb.sum; \
         exit $$result
  
+check-parallel-racy:
+       -rm -rf cache racy_outputs temp
+       racyiter="$(RACY_ITER)"; \
+       test "x$$racyiter" == "x" && \
+         racyiter=$(DEFAULT_RACY_ITER); \
+       if test $$racyiter -lt 2 ; then \
+         echo "RACY_ITER must be at least 2."; \
+         exit 1; \
+       fi; \
+       trap "exit" INT; \
+       for n in `seq $$racyiter` ; do \
+         $(MAKE) -k do-check-parallel-racy \
+           RACY_OUTPUT_N=$$n; \
+         $(SHELL) $(srcdir)/dg-extract-results.sh \
+           `find racy_outputs/$$n -name gdb.sum -print` > \
+           racy_outputs/$$n/gdb.sum; \
+         $(SHELL) $(srcdir)/dg-extract-results.sh -L \
+           `find racy_outputs/$$n -name gdb.log -print` > \
+           racy_outputs/$$n/gdb.log; \
+         sed -n '/=== gdb Summary ===/,$$ p' racy_outputs/$$n/gdb.sum; \
+       done; \
+       $(srcdir)/analyze-racy-logs.py \
+         `ls racy_outputs/*/gdb.sum` > racy.sum; \
+       sed -n '/=== gdb Summary ===/,$$ p' racy.sum
+
  # Turn a list of .exp files into "check/" targets.  Only examine .exp
  # files appearing in a gdb.* directory -- we don't want to pick up
  # lib/ by mistake.  For example, gdb.linespec/linespec.exp becomes
@@ -214,9 +266,9 @@ slow_tests = gdb.base/break-interp.exp gdb.base/interp.exp \
         gdb.base/multi-forks.exp
  @GMAKE_TRUE@all_tests := $(shell cd $(srcdir) && find gdb.* -name '*.exp' -print)
  @GMAKE_TRUE@reordered_tests := $(slow_tests) $(filter-out $(slow_tests),$(all_tests))
-@GMAKE_TRUE@TEST_TARGETS := $(addprefix check/,$(reordered_tests))
+@GMAKE_TRUE@TEST_TARGETS := $(addprefix $(if $(RACY_ITER),check-racy,check)/,$(reordered_tests))
  @GMAKE_TRUE@else
-@GMAKE_TRUE@TEST_TARGETS := $(addprefix check/,$(expanded_tests_or_none))
+@GMAKE_TRUE@TEST_TARGETS := $(addprefix $(if $(RACY_ITER),check-racy,check)/,$(expanded_tests_or_none))
  @GMAKE_TRUE@endif
  
  do-check-parallel: $(TEST_TARGETS)
@@ -226,6 +278,15 @@ do-check-parallel: $(TEST_TARGETS)
  @GMAKE_TRUE@   -mkdir -p outputs/$*
  @GMAKE_TRUE@   @$(DO_RUNTEST) GDB_PARALLEL=yes --outdir=outputs/$* $*.exp $(RUNTESTFLAGS)
  
+do-check-parallel-racy: $(TEST_TARGETS)
+       @:
+
+@GMAKE_TRUE@check-racy/%.exp:
+@GMAKE_TRUE@   -mkdir -p racy_outputs/$(RACY_OUTPUT_N)/$*
+@GMAKE_TRUE@   $(DO_RUNTEST) GDB_PARALLEL=yes \
+@GMAKE_TRUE@     --outdir=racy_outputs/$(RACY_OUTPUT_N)/$* $*.exp \
+@GMAKE_TRUE@     $(RUNTESTFLAGS)
+
  check/no-matching-tests-found:
         @echo ""
         @echo "No matching tests found."
diff --git a/gdb/testsuite/README b/gdb/testsuite/README

index 6b59027731520949acb28ce7ae4ee1b417553515..043a8bd6b6c106302645721e7731898ee76647b2 100644 (file)
--- a/gdb/testsuite/README
+++ b/gdb/testsuite/README
@@ -50,6 +50,51 @@ to any non-empty value:
  If you want to use runtest directly instead of using the Makefile, see
  the description of GDB_PARALLEL below.
  
+Racy testcases
+**************
+
+Sometimes, new testcases are added to the testsuite that are not
+entirely deterministic, and can randomly pass or fail.  We call them
+"racy testcases", and they can be bothersome when one is comparing
+different testsuite runs.  In order to help identifying them, it is
+possible to run the tests several times in a row and ask the testsuite
+machinery to analyze the results.  To do that, you need to specify the
+RACY_ITER environment variable to make:
+
+       make check RACY_ITER=5 -j4
+
+The value assigned to RACY_ITER represents the number of times you
+wish to run the tests in sequence (in the example above, the entire
+testsuite will be executed 5 times in a row, in parallel).  It is also
+possible to check just a specific test:
+
+       make check TESTS='gdb.base/default.exp' RACY_ITER=3
+
+One can also decide to call the Makefile rules by hand inside the
+gdb/testsuite directory, e.g.:
+
+       make check-paralell-racy -j4
+
+In which case the value of the DEFAULT_RACY_ITER variable (inside
+gdb/testsuite/Makefile.in) will be used to determine how many
+iterations will be run.
+
+After running the tests, you shall see a file name 'racy.sum' in the
+gdb/testsuite directory.  You can also inspect the generated *.log and
+*.sum files by looking into the gdb/testsuite/racy_ouputs directory.
+
+If you already have *.sum files generated from previous testsuite runs
+and you would like to analyze them without having to run the testsuite
+again, you can also use the 'analyze-racy-logs.py' script directly.
+It is located in the gdb/testsuite/ directory, and it expects a list
+of two or more *.sum files to be provided as its argument.  For
+example:
+
+       ./gdb/testsuite/analyze-racy-logs.py testsuite-01/gdb.sum \
+         testsuite-02/gdb.sum testsuite-03/gdb.sum
+
+The script will output its analysis report to the standard output.
+
  Running the Performance Tests
  *****************************
  
diff --git a/gdb/testsuite/analyze-racy-logs.py b/gdb/testsuite/analyze-racy-logs.py

new file mode 100644 (file)

index 0000000..06dbc3b
--- /dev/null
+++ b/gdb/testsuite/analyze-racy-logs.py
@@ -0,0 +1,177 @@
+#!/usr/bin/env python
+
+# Copyright (C) 2016 Free Software Foundation, Inc.
+#
+# This file is part of GDB.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+# This program is used to analyze the test results (i.e., *.sum files)
+# generated by GDB's testsuite, and print the testcases that are found
+# to be racy.
+#
+# Racy testcases are considered as being testcases which can
+# intermittently FAIL (or PASS) when run two or more times
+# consecutively, i.e., tests whose results are not deterministic.
+#
+# This program is invoked when the user runs "make check" and
+# specifies the RACY_ITER environment variable.
+
+import sys
+import os
+import re
+
+# The (global) dictionary that stores the associations between a *.sum
+# file and its results.  The data inside it will be stored as:
+#
+# files_and_tests = { 'file1.sum' : { 'PASS' : { 'test1', 'test2' ... },
+#                                     'FAIL' : { 'test5', 'test6' ... },
+#                                     ...
+#                                   },
+#                   { 'file2.sum' : { 'PASS' : { 'test1', 'test3' ... },
+#                                   ...
+#                                   }
+#                   }
+
+files_and_tests = dict ()
+
+# The relatioships between various states of the same tests that
+# should be ignored.  For example, if the same test PASSes on a
+# testcase run but KFAILs on another, this test should be considered
+# racy because a known-failure is...  known.
+
+ignore_relations = { 'PASS' : 'KFAIL' }
+
+# We are interested in lines that start with '.?(PASS|FAIL)'.  In
+# other words, we don't process errors (maybe we should).
+
+sum_matcher = re.compile('^(.?(PASS|FAIL)): (.*)$')
+
+def parse_sum_line (line, dic):
+    """Parse a single LINE from a sumfile, and store the results in the
+dictionary referenced by DIC."""
+    global sum_matcher
+
+    line = line.rstrip ()
+    m = re.match (sum_matcher, line)
+    if m:
+        result = m.group (1)
+        test_name = m.group (3)
+        # Remove tail parentheses.  These are likely to be '(timeout)'
+        # and other extra information that will only confuse us.
+        test_name = re.sub ('(\s+)?\(.*$', '', test_name)
+        if result not in dic.keys ():
+            dic[result] = set ()
+        if test_name in dic[result]:
+            # If the line is already present in the dictionary, then
+            # we include a unique identifier in the end of it, in the
+            # form or '<<N>>' (where N is a number >= 2).  This is
+            # useful because the GDB testsuite is full of non-unique
+            # test messages; however, if you process the racy summary
+            # file you will also need to perform this same operation
+            # in order to identify the racy test.
+            i = 2
+            while True:
+                nname = test_name + ' <<' + str (i) + '>>'
+                if nname not in dic[result]:
+                    break
+                i += 1
+            test_name = nname
+        dic[result].add (test_name)
+
+def read_sum_files (files):
+    """Read the sumfiles (passed as a list in the FILES variable), and
+process each one, filling the FILES_AND_TESTS global dictionary with
+information about them. """
+    global files_and_tests
+
+    for x in files:
+        with open (x, 'r') as f:
+            files_and_tests[x] = dict ()
+            for line in f.readlines ():
+                parse_sum_line (line, files_and_tests[x])
+
+def identify_racy_tests ():
+    """Identify and print the racy tests.  This function basically works
+on sets, and the idea behind it is simple.  It takes all the sets that
+refer to the same result (for example, all the sets that contain PASS
+tests), and compare them.  If a test is present in all PASS sets, then
+it is not racy.  Otherwise, it is.
+
+This function does that for all sets (PASS, FAIL, KPASS, KFAIL, etc.),
+and then print a sorted list (without duplicates) of all the tests
+that were found to be racy."""
+    global files_and_tests
+
+    # First, construct two dictionaries that will hold one set of
+    # testcases for each state (PASS, FAIL, etc.).
+    #
+    # Each set in NONRACY_TESTS will contain only the non-racy
+    # testcases for that state.  A non-racy testcase is a testcase
+    # that has the same state in all test runs.
+    #
+    # Each set in ALL_TESTS will contain all tests, racy or not, for
+    # that state.
+    nonracy_tests = dict ()
+    all_tests = dict ()
+    for f in files_and_tests:
+        for state in files_and_tests[f]:
+            try:
+                nonracy_tests[state] &= files_and_tests[f][state].copy ()
+            except KeyError:
+                nonracy_tests[state] = files_and_tests[f][state].copy ()
+
+            try:
+                all_tests[state] |= files_and_tests[f][state].copy ()
+            except KeyError:
+                all_tests[state] = files_and_tests[f][state].copy ()
+
+    # Now, we eliminate the tests that are present in states that need
+    # to be ignored.  For example, tests both in the PASS and KFAIL
+    # states should not be considered racy.
+    ignored_tests = set ()
+    for s1, s2 in ignore_relations.iteritems ():
+        try:
+            ignored_tests |= (all_tests[s1] & all_tests[s2])
+        except:
+            continue
+
+    racy_tests = set ()
+    for f in files_and_tests:
+        for state in files_and_tests[f]:
+            racy_tests |= files_and_tests[f][state] - nonracy_tests[state]
+
+    racy_tests = racy_tests - ignored_tests
+
+    # Print the header.
+    print "\t\t=== gdb racy tests ===\n"
+
+    # Print each test.
+    for line in sorted (racy_tests):
+        print line
+
+    # Print the summary.
+    print "\n"
+    print "\t\t=== gdb Summary ===\n"
+    print "# of racy tests:\t\t%d" % len (racy_tests)
+
+if __name__ == '__main__':
+    if len (sys.argv) < 3:
+        # It only makes sense to invoke this program if you pass two
+        # or more files to be analyzed.
+        sys.exit ("Usage: %s [FILE] [FILE] ..." % sys.argv[0])
+    read_sum_files (sys.argv[1:])
+    identify_racy_tests ()
+    exit (0)
author	Sergio Durigan Junior <sergiodj@redhat.com>
	Sun, 6 Mar 2016 01:37:11 +0000 (20:37 -0500)
committer	Sergio Durigan Junior <sergiodj@redhat.com>
	Sun, 6 Mar 2016 01:43:40 +0000 (20:43 -0500)
gdb/testsuite/ChangeLog		patch \| blob \| history
gdb/testsuite/Makefile.in		patch \| blob \| history
gdb/testsuite/README		patch \| blob \| history
gdb/testsuite/analyze-racy-logs.py	[new file with mode: 0644]	patch \| blob