gitlab-ci/deqp: detect and report flakes
authorRob Clark <robdclark@chromium.org>
Sun, 17 Nov 2019 19:33:01 +0000 (11:33 -0800)
committerRob Clark <robdclark@chromium.org>
Fri, 22 Nov 2019 21:48:29 +0000 (13:48 -0800)
If there are a small number of fails, re-run to determine if they are
flakes, and optionally (if `$FLAKES_CHANNEL` configured) report the
flakes.

This way flakes don't interfere with developers working on other
drivers, but get logged so that the developers working on the flaking
driver can monitor the situation.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Acked-by: Eric Engestrom <eric.engestrom@intel.com>
.gitlab-ci.yml
.gitlab-ci/container/arm_test.sh
.gitlab-ci/deqp-runner.sh

index e074f0ac58760e2c54935e3a432d1a4dd9e31a25..6ba2cd5e32755f713f1cf57e2a9fc70a04bb0a1f 100644 (file)
@@ -461,7 +461,7 @@ scons-old-llvm:
     - tar -xf artifacts/install.tar
     - LD_LIBRARY_PATH=install/lib find install/lib -name "*.so" -print -exec ldd {} \;
   artifacts:
-    when: on_failure
+    when: always
     name: "$CI_JOB_NAME-$CI_COMMIT_REF_NAME"
     paths:
       - results/
@@ -555,6 +555,7 @@ arm64_a630_gles2:
     DEQP_SKIPS: deqp-freedreno-a630-skips.txt
     NIR_VALIDATE: 0
     DEQP_PARALLEL: 4
+    FLAKES_CHANNEL: "#freedreno-ci"
   tags:
     - mesa-cheza
   dependencies:
index 2ea6327cb84e731771d2b68e14746f2efd2dd7c7..16dc0ddca9a8745027f115625e971bd477a95b48 100644 (file)
@@ -26,6 +26,7 @@ apt-get -y install \
        libvulkan-dev \
        libvulkan1 \
        meson \
+       netcat \
        pkg-config \
        procps \
        python \
index 58aa7759b7ab8f74c352893d8c5931622a70f72d..989a222967e8cfbee5074cd72bf6b17f8b3261c3 100755 (executable)
@@ -57,15 +57,56 @@ fi
 
 set +e
 
-vulkan-cts-runner \
-    --deqp /deqp/modules/$DEQP_VER/deqp-$DEQP_VER \
-    --output $RESULTS/cts-runner-results.txt \
-    --caselist /tmp/case-list.txt \
-    --exclude-list $ARTIFACTS/$DEQP_SKIPS \
-    $XFAIL \
-    --job ${DEQP_PARALLEL:-1} \
-    -- \
-    "${DEQP_OPTIONS[@]}"
+run_cts() {
+    caselist=$1
+    output=$2
+    deqp-runner \
+        --deqp /deqp/modules/$DEQP_VER/deqp-$DEQP_VER \
+        --output $output \
+        --caselist $caselist \
+        --exclude-list $ARTIFACTS/$DEQP_SKIPS \
+        $XFAIL \
+        --job ${DEQP_PARALLEL:-1} \
+       --allow-flakes true \
+        -- \
+        "${DEQP_OPTIONS[@]}"
+}
+
+report_flakes() {
+    if [ -z "$FLAKES_CHANNEL" ]; then
+        return 0
+    fi
+    flakes=$1
+    bot="$CI_RUNNER_DESCRIPTION-$CI_PIPELINE_ID"
+    channel="$FLAKES_CHANNEL"
+    (
+    echo NICK $bot
+    echo USER $bot unused unused :Gitlab CI Notifier
+    sleep 10
+    echo "JOIN $channel"
+    sleep 1
+    desc="Flakes detected in job: $CI_JOB_URL on $CI_RUNNER_DESCRIPTION"
+    if [ -n "CI_MERGE_REQUEST_SOURCE_BRANCH_NAME" ]; then
+        desc="$desc on branch $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME ($CI_MERGE_REQUEST_TITLE)"
+    fi
+    echo "PRIVMSG $channel :$desc"
+    for flake in `cat $flakes`; do
+        echo "PRIVMSG $channel :$flake"
+    done
+    echo "PRIVMSG $channel :See $CI_JOB_URL/artifacts/browse/results/"
+    echo "QUIT"
+    ) | nc irc.freenode.net 6667 > /dev/null
+
+}
+
+# wrapper to supress +x to avoid spamming the log
+quiet() {
+    set +x
+    "$@"
+    set -x
+}
+
+run_cts /tmp/case-list.txt $RESULTS/cts-runner-results.txt
 DEQP_EXITCODE=$?
 
 if [ $DEQP_EXITCODE -ne 0 ]; then
@@ -78,6 +119,28 @@ if [ $DEQP_EXITCODE -ne 0 ]; then
         grep -v ",ExpectedFail" > \
         $RESULTS/cts-runner-unexpected-results.txt
     head -n 50 $RESULTS/cts-runner-unexpected-results.txt
+
+    count=`cat $RESULTS/cts-runner-unexpected-results.txt | wc -l`
+
+    # Re-run fails to detect flakes.  But use a small threshold, if
+    # something was fundamentally broken, we don't want to re-run
+    # the entire caselist
+else
+    cat $RESULTS/cts-runner-results.txt | \
+        grep ",Flake" > \
+        $RESULTS/cts-runner-flakes.txt
+
+    count=`cat $RESULTS/cts-runner-flakes.txt | wc -l`
+    if [ $count -gt 0 ]; then
+        echo "Some flakes found (see cts-runner-flakes.txt in artifacts for full results):"
+        head -n 50 $RESULTS/cts-runner-flakes.txt
+
+        # Report the flakes to IRC channel for monitoring (if configured):
+        quiet report_flakes $RESULTS/cts-runner-flakes.txt
+    else
+        # no flakes, so clean-up:
+        rm $RESULTS/cts-runner-flakes.txt
+    fi
 fi
 
 exit $DEQP_EXITCODE