From 0b6e03b8481ffd332946302305ed6535803bc55a Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 11 Jun 2020 17:16:28 +0200 Subject: [PATCH] CI: reduce bandwidth for git pull MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Over the last 7 days, git pulls represented a total of 1.7 TB. On those 1.7 TB, we can see: - ~300 GB for the CI farm on hetzner - ~730 GB for the CI farm on packet.net - ~680 GB for the rest of the world We can not really change the rest of the world*, but we can certainly reduce the egress costs towards our CI farms. Right now, the gitlab runners are not doing a good job at caching the git trees for the various jobs we make, and we end up with a lot of cache-misses. A typical pipeline ends up with a good 2.8GB of git pull data. (a compressed archive of the mesa folder accounts for 280MB) In this patch, we implemented what was suggested in https://gitlab.com/gitlab-org/gitlab/-/issues/215591#note_334642576 - we host a brand new MinIO server on packet - jobs can upload files on 2 locations: * git-cache///.tar.gz * artifacts//// - the authorization is handled by gitlab with short tokens valid only for the time of the job is running - whenever a job runs, the runner are configured to execute (eval) $CI_PRE_CLONE_SCRIPT - this variable is set globally to download the current cache from the MinIO packet server, unpack it and replace the possibly out of date cache found on the runner - then git fetch is run by the runner, and only the delta between the upstream tree and the local tree gets pulled. We can rebuild the git cache in a schedule job (once a day seems sufficient), and then we can stop the cache miss entirely. First results showed that instead of pulling 280MB of data in my fork, I got a pull of only 250KB. That should help us. * arguably, there are other farms in the rest of the world, so hopefully we can change those too. Reviewed-by: Michel Dänzer Reviewed-by: Peter Hutterer Signed-off-by: Benjamin Tissoires Part-of: --- .gitlab-ci.yml | 49 +++++++++++++++++++++++++++++++- .gitlab-ci/download-git-cache.sh | 36 +++++++++++++++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 .gitlab-ci/download-git-cache.sh diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f5a9de33b76..12843417517 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,16 +1,24 @@ variables: FDO_UPSTREAM_REPO: mesa/mesa + CI_PRE_CLONE_SCRIPT: |- + set -o xtrace + /usr/bin/wget -q -O- ${CI_PROJECT_URL}/-/raw/${CI_COMMIT_SHA}/.gitlab-ci/download-git-cache.sh | sh - + set +o xtrace include: - project: 'freedesktop/ci-templates' - ref: 59de540b620c45739871d1a073d76d5521989d11 + ref: &ci-templates-sha 52dd4a94044449c8481d18dcdc221a3c636366d2 file: '/templates/debian.yml' + - project: 'freedesktop/ci-templates' + ref: *ci-templates-sha + file: '/templates/alpine.yml' - local: '.gitlab-ci/lava-gitlab-ci.yml' - local: '.gitlab-ci/test-source-dep.yml' stages: - container+docs - container-2 + - git-archive - deploy - meson-x86_64 - scons @@ -388,6 +396,45 @@ arm64_test: needs: - windows_build_vs2019 +git_archive: + extends: .fdo.container-build@alpine + stage: container+docs + only: + - schedules + variables: + FDO_REPO_SUFFIX: &git-archive-suffix "alpine/git_archive" + FDO_DISTRIBUTION_EXEC: 'pip3 install git+http://gitlab.freedesktop.org/freedesktop/ci-templates@52dd4a94044449c8481d18dcdc221a3c636366d2' + # no need to pull the whole repo to build the container image + GIT_STRATEGY: none + FDO_DISTRIBUTION_TAG: &git-archive-tag "2020-07-02" + FDO_DISTRIBUTION_PACKAGES: git py3-pip + + +# Git archive + +make git archive: + stage: git-archive + extends: .fdo.suffixed-image@alpine + only: + - schedules + # ensure we are running on packet + tags: + - packet.net + variables: + FDO_DISTRIBUTION_TAG: *git-archive-tag + FDO_REPO_SUFFIX: *git-archive-suffix + needs: + - git_archive + + script: + # compress the current folder + - tar -cvzf ../$CI_PROJECT_NAME.tar.gz . + + # login with the JWT token + - ci-fairy minio login $CI_JOB_JWT + - ci-fairy minio cp ../$CI_PROJECT_NAME.tar.gz minio://minio-packet.freedesktop.org/git-cache/$CI_PROJECT_NAMESPACE/$CI_PROJECT_NAME/$CI_PROJECT_NAME.tar.gz + + # BUILD # Shared between windows and Linux diff --git a/.gitlab-ci/download-git-cache.sh b/.gitlab-ci/download-git-cache.sh new file mode 100644 index 00000000000..693925e975b --- /dev/null +++ b/.gitlab-ci/download-git-cache.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +set +e +set -o xtrace + +# if we run this script outside of gitlab-ci for testing, ensure +# we got meaningful variables +CI_PROJECT_DIR=${CI_PROJECT_DIR:-$(mktemp -d)/mesa} + +if [[ -e $CI_PROJECT_DIR/.git ]] +then + echo "Repository already present, skip cache download" + exit +fi + +TMP_DIR=$(mktemp -d) + +echo "Downloading archived master..." +/usr/bin/wget -O $TMP_DIR/mesa.tar.gz \ + https://minio-packet.freedesktop.org/git-cache/mesa/mesa/mesa.tar.gz + +# check wget error code +if [[ $? -ne 0 ]] +then + echo "Repository cache not available" + exit +fi + +set -e + +rm -rf "$CI_PROJECT_DIR" +echo "Extracting tarball into '$CI_PROJECT_DIR'..." +mkdir -p "$CI_PROJECT_DIR" +tar xzf "$TMP_DIR/mesa.tar.gz" -C "$CI_PROJECT_DIR" +rm -rf "$TMP_DIR" +chmod a+w "$CI_PROJECT_DIR" -- 2.30.2