ci: Use cts_runner for our dEQP runs.
authorEric Anholt <eric@anholt.net>
Mon, 4 Nov 2019 18:54:41 +0000 (10:54 -0800)
committerEric Anholt <eric@anholt.net>
Tue, 12 Nov 2019 20:54:04 +0000 (12:54 -0800)
This runner is a little project by Bas, written in C++, that spawns
threads that then loop grabbing chunks of the (randomly shuffled but
consistently so) test list and hand it to a dEQP instance.  As the
remaining list gets shorter, so do the chunks, so hopefully the
threads all complete effectively at once.  It also handles restarting
after crashes automatically.  I've extended the runner a bit to do
what I was doing in the bash scripts before, like the skip list and
expected failures handling.  This project should also be a good
baseline for extending to handle retesting of intermittent failures.

By switching to it, we can have the swrast tests just take up one job
slot on the shared runners and keep their allotment of CPUs busy,
instead of taking up job slots with single-threaded dEQP jobs.  It
will also let us (eventually, once I reprovision) switch the freedreno
runners over to threading within the job instead of running concurrent
jobs, so that memory scribbles in one pipeline don't affect unrelated
pipelines, and I can experiment with their parallelism (particularly
on a306 where we are frequently backed up) without trashing other
people's jobs.

What we lose in this process is per-test output in the log (not a big
loss, I think, since we summarize fails at the end and reducing log
length keeps chrome from choking on our logs so badly).  We also drop
the renderer sanity checking, since it's not saving qpa files for us
to go poke through.  Given that all the drivers involved have fail
lists, if we got the wrong renderer somehow, we'd get a job failure
anyway.

v2: Rebase on droppong of the autoscale cluster and the arm64
    build/test split.  Use a script to deduplicate the cts-runner
    build.
v3: Rebase on the amd64 build/test container split.

Acked-by: Daniel Stone <daniels@collabora.com> (v1)
Reviewed-by: Tomeu Vizoso <tomeu.vizoso@collabora.com> (v2)
.gitlab-ci.yml
.gitlab-ci/build-cts-runner.sh [new file with mode: 0644]
.gitlab-ci/debian-arm64-test-install.sh
.gitlab-ci/debian-test-install.sh
.gitlab-ci/deqp-freedreno-a630-skips.txt
.gitlab-ci/deqp-runner.sh

index bc196134449ea7d5bb2376d5ac09241a3c228c14..83f3346a1c6afae0269c9a676e4a28e1e7259d16 100644 (file)
@@ -15,9 +15,9 @@
 variables:
   UPSTREAM_REPO: mesa/mesa
   DEBIAN_TAG: "amd64-2019-11-13-2"
-  DEBIAN_TEST_TAG: "amd64-test-2019-11-12"
+  DEBIAN_TEST_TAG: "amd64-test-2019-11-12-2"
   DEBIAN_ARM64_TAG: "arm64v8-2019-11-06"
-  DEBIAN_ARM64_TEST_TAG: "arm64v8-test-2019-11-12"
+  DEBIAN_ARM64_TEST_TAG: "arm64v8-test-2019-11-12-2"
   STRETCH_TAG: "2019-09-18"
   DEBIAN_VERSION: buster-slim
   STRETCH_VERSION: stretch-slim
@@ -520,19 +520,21 @@ piglit-glslparser+quick_shader:
     - ./artifacts/deqp-runner.sh
 
 test-llvmpipe-gles2:
-  parallel: 4
   variables:
     DEQP_VER: gles2
+    DEQP_PARALLEL: 4
+    # Don't use threads inside llvmpipe, we've already got all 4 cores
+    # busy with DEQP_PARALLEL.
+    LP_NUM_THREADS: 0
     DEQP_EXPECTED_FAILS: deqp-llvmpipe-fails.txt
     LIBGL_ALWAYS_SOFTWARE: "true"
-    DEQP_RENDERER_MATCH: "llvmpipe"
   extends: .deqp-test
 
 test-softpipe-gles2:
   extends: test-llvmpipe-gles2
   variables:
     DEQP_EXPECTED_FAILS: deqp-softpipe-fails.txt
-    DEQP_RENDERER_MATCH: "softpipe"
+    DEQP_SKIPS: deqp-softpipe-skips.txt
     GALLIUM_DRIVER: "softpipe"
 
 # The GLES2 CTS run takes about 8 minutes of CPU time, while GLES3 is
@@ -541,9 +543,9 @@ test-softpipe-gles2:
 test-softpipe-gles3-limited:
   variables:
     DEQP_VER: gles3
+    DEQP_PARALLEL: 4
     DEQP_EXPECTED_FAILS: deqp-softpipe-fails.txt
     LIBGL_ALWAYS_SOFTWARE: "true"
-    DEQP_RENDERER_MATCH: "softpipe"
     GALLIUM_DRIVER: "softpipe"
     CI_NODE_INDEX: 1
     CI_NODE_TOTAL: 10
@@ -554,7 +556,6 @@ arm64_a630_gles2:
   image: $DEBIAN_ARM64_TEST_IMAGE
   variables:
     DEQP_VER: gles2
-    DEQP_RENDERER_MATCH: "FD630"
     DEQP_EXPECTED_FAILS: deqp-freedreno-a630-fails.txt
     DEQP_SKIPS: deqp-freedreno-a630-skips.txt
     NIR_VALIDATE: 0
@@ -584,6 +585,5 @@ arm64_a306_gles2:
   variables:
     DEQP_EXPECTED_FAILS: deqp-freedreno-a307-fails.txt
     DEQP_SKIPS: deqp-default-skips.txt
-    DEQP_RENDERER_MATCH: "FD307"
   tags:
     - db410c
diff --git a/.gitlab-ci/build-cts-runner.sh b/.gitlab-ci/build-cts-runner.sh
new file mode 100644 (file)
index 0000000..2622819
--- /dev/null
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+set -ex
+
+git clone https://github.com/anholt/cts_runner.git --depth 1 -b anholt-mesa-ci-2
+cd cts_runner
+meson build/
+ninja -C build -j4 install
+cd ..
+rm -rf cts_runner
index b59d6646ebb4ea24c3ac69e9b4e22e04d439a925..2ea6327cb84e731771d2b68e14746f2efd2dd7c7 100644 (file)
@@ -23,13 +23,20 @@ apt-get -y install \
        libllvm8 \
        libpng16-16 \
        libpng-dev \
+       libvulkan-dev \
+       libvulkan1 \
        meson \
        pkg-config \
        procps \
        python \
+       waffle-utils \
        wget \
        zlib1g
 
+############### Build dEQP runner
+
+. .gitlab-ci/build-cts-runner.sh
+
 ############### Build dEQP
 
 . .gitlab-ci/build-deqp.sh
@@ -47,6 +54,7 @@ apt-get purge -y \
         libgbm-dev \
         libgles2-mesa-dev \
         libpng-dev \
+        libvulkan-dev \
         meson \
         pkg-config \
         python \
index 4af73993519d2cb5b07a6c7fd0c738c207e08226..59dcbbd064b30e436a450bc1e6879d34eaaf0045 100644 (file)
@@ -32,6 +32,7 @@ apt-get install -y --no-remove \
       libpng16-16 \
       libpng-dev \
       libvulkan1 \
+      libvulkan-dev \
       libwaffle-dev \
       libwayland-server0 \
       libxcb-xfixes0 \
@@ -65,6 +66,10 @@ rm -rf target_api
 popd
 
 
+############### Build dEQP runner
+
+. .gitlab-ci/build-cts-runner.sh
+
 ############### Build dEQP
 
 . .gitlab-ci/build-deqp.sh
index 65d340c368767fb982eef04b0ea30dff98a70ae1..477f4bd06e0a776525d4e80fbfb155d87bd57318 100644 (file)
@@ -27,3 +27,6 @@ dEQP-GLES3.functional.texture.specification.texsubimage2d_pbo.r16ui_2d
 # Layered rendering is sysmem only and needs working clears
 dEQP-GLES31.functional.geometry_shading.layered.*
 dEQP-GLES31.functional.geometry_shading.instanced.*layer.*
+
+# Intermittent timeout
+dEQP-GLES31.functional.ssbo.layout.random.all_shared_buffer.23
index 822665ca66ef26f25d345291c0fb75d08243383e..2e93a643a36ffc6fabba01aff21f370bc75fcac8 100755 (executable)
@@ -6,8 +6,6 @@ DEQP_OPTIONS=(--deqp-surface-width=256 --deqp-surface-height=256)
 DEQP_OPTIONS+=(--deqp-surface-type=pbuffer)
 DEQP_OPTIONS+=(--deqp-gl-config-name=rgba8888d24s8ms0)
 DEQP_OPTIONS+=(--deqp-visibility=hidden)
-DEQP_OPTIONS+=(--deqp-log-images=disable)
-DEQP_OPTIONS+=(--deqp-crashhandler=enable)
 
 # It would be nice to be able to enable the watchdog, so that hangs in a test
 # don't need to wait the full hour for the run to time out.  However, some
@@ -26,20 +24,7 @@ if [ -z "$DEQP_SKIPS" ]; then
    exit 1
 fi
 
-# Prep the expected failure list
-if [ -n "$DEQP_EXPECTED_FAILS" ]; then
-   export DEQP_EXPECTED_FAILS=`pwd`/artifacts/$DEQP_EXPECTED_FAILS
-else
-   export DEQP_EXPECTED_FAILS=/tmp/expect-no-failures.txt
-   touch $DEQP_EXPECTED_FAILS
-fi
-sort < $DEQP_EXPECTED_FAILS > /tmp/expected-fails.txt
-
-# Fix relative paths on inputs.
-export DEQP_SKIPS=`pwd`/artifacts/$DEQP_SKIPS
-
-# Be a good citizen on the shared runners.
-export LP_NUM_THREADS=4
+ARTIFACTS=`pwd`/artifacts
 
 # Set up the driver environment.
 export LD_LIBRARY_PATH=`pwd`/install/lib/
@@ -52,19 +37,9 @@ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib
 RESULTS=`pwd`/results
 mkdir -p $RESULTS
 
-cd /deqp/modules/$DEQP_VER
-
 # Generate test case list file
 cp /deqp/mustpass/$DEQP_VER-master.txt /tmp/case-list.txt
 
-# Note: not using sorted input and comm, becuase I want to run the tests in
-# the same order that dEQP would.
-while read -r line; do
-   if echo "$line" | grep -q '^[^#]'; then
-       sed -i "/$line/d" /tmp/case-list.txt
-   fi
-done < $DEQP_SKIPS
-
 # If the job is parallel, take the corresponding fraction of the caselist.
 # Note: N~M is a gnu sed extension to match every nth line (first line is #1).
 if [ -n "$CI_NODE_INDEX" ]; then
@@ -76,61 +51,29 @@ if [ ! -s /tmp/case-list.txt ]; then
     exit 1
 fi
 
-# Cannot use tee because dash doesn't have pipefail
-touch /tmp/result.txt
-tail -f /tmp/result.txt &
-
-./deqp-$DEQP_VER "${DEQP_OPTIONS[@]}" --deqp-log-filename=$RESULTS/results.qpa --deqp-caselist-file=/tmp/case-list.txt >> /tmp/result.txt
-DEQP_EXITCODE=$?
-
-sed -ne \
-    '/StatusCode="Fail"/{x;p}; s/#beginTestCaseResult //; T; h' \
-    $RESULTS/results.qpa \
-    > /tmp/unsorted-fails.txt
-
-# Scrape out the renderer that the test run used, so we can validate that the
-# right driver was used.
-if grep -q "dEQP-.*.info.renderer" /tmp/case-list.txt; then
-    # This is an ugly dependency on the .qpa format: Print 3 lines after the
-    # match, which happens to contain the result.
-    RENDERER=`sed -n '/#beginTestCaseResult dEQP-.*.info.renderer/{n;n;n;p}' $RESULTS/results.qpa | sed -n -E "s|<Text>(.*)</Text>|\1|p"`
-
-    echo "GL_RENDERER for this test run: $RENDERER"
-
-    if [ -n "$DEQP_RENDERER_MATCH" ]; then
-        echo $RENDERER | grep -q $DEQP_RENDERER_MATCH > /dev/null
-    fi
+if [ -n "$DEQP_EXPECTED_FAILS" ]; then
+    XFAIL="--xfail-list $ARTIFACTS/$DEQP_EXPECTED_FAILS"
 fi
 
-if grep -q "dEQP-.*.info.version" /tmp/case-list.txt; then
-    # This is an ugly dependency on the .qpa format: Print 3 lines after the
-    # match, which happens to contain the result.
-    VERSION=`sed -n '/#beginTestCaseResult dEQP-.*.info.version/{n;n;n;p}' $RESULTS/results.qpa | sed -n -E "s|<Text>(.*)</Text>|\1|p"`
-    echo "Driver version tested: $VERSION"
-fi
+set +e
+
+vulkan-cts-runner \
+    --deqp /deqp/modules/$DEQP_VER/deqp-$DEQP_VER \
+    --output $RESULTS/cts-runner-results.txt \
+    --caselist /tmp/case-list.txt \
+    --exclude-list $ARTIFACTS/$DEQP_SKIPS \
+    $XFAIL \
+    --job ${DEQP_PARALLEL:-1} \
+    -- \
+    "${DEQP_OPTIONS[@]}"
+DEQP_EXITCODE=$?
 
 if [ $DEQP_EXITCODE -ne 0 ]; then
-   exit $DEQP_EXITCODE
-fi
-
-sort < /tmp/unsorted-fails.txt > $RESULTS/fails.txt
-
-comm -23 $RESULTS/fails.txt /tmp/expected-fails.txt > /tmp/new-fails.txt
-if [ -s /tmp/new-fails.txt ]; then
-    echo "Unexpected failures:"
-    cat /tmp/new-fails.txt
-    exit 1
-else
-    echo "No new failures"
-fi
-
-sort /tmp/case-list.txt > /tmp/sorted-case-list.txt
-comm -12 /tmp/sorted-case-list.txt /tmp/expected-fails.txt > /tmp/expected-fails-in-caselist.txt
-comm -13 $RESULTS/fails.txt /tmp/expected-fails-in-caselist.txt > /tmp/new-passes.txt
-if [ -s /tmp/new-passes.txt ]; then
-    echo "Unexpected passes, please update $DEQP_EXPECTED_FAILS (or add flaky tests to $DEQP_SKIPS):"
-    cat /tmp/new-passes.txt
-    exit 1
-else
-    echo "No new passes"
+    echo "Some unexpected results found (see cts-runner-results.txt in artifacts for full results):"
+    cat $RESULTS/cts-runner-results.txt | \
+        grep -v ",Pass" | \
+        grep -v ",Skip" | \
+        grep -v ",ExpectedFail" | \
+        head -n 50
+    exit $DEQP_EXITCODE
 fi